// SPDX-License-Identifier: GPL-2.0 /* * KUnit test for the KUnit executor. * * Copyright (C) 2021, Google LLC. * Author: Daniel Latypov */ #include static void kfree_at_end(struct kunit *test, const void *to_free); static void free_subsuite_at_end(struct kunit *test, struct kunit_suite *const *to_free); static struct kunit_suite *alloc_fake_suite(struct kunit *test, const char *suite_name, struct kunit_case *test_cases); static void dummy_test(struct kunit *test) {} static struct kunit_case dummy_test_cases[] = { /* .run_case is not important, just needs to be non-NULL */ { .name = "test1", .run_case = dummy_test }, { .name = "test2", .run_case = dummy_test }, {}, }; static void parse_filter_test(struct kunit *test) { struct kunit_test_filter filter = {NULL, NULL}; kunit_parse_filter_glob(&filter, "suite"); KUNIT_EXPECT_STREQ(test, filter.suite_glob, "suite"); KUNIT_EXPECT_FALSE(test, filter.test_glob); kfree(filter.suite_glob); kfree(filter.test_glob); kunit_parse_filter_glob(&filter, "suite.test"); KUNIT_EXPECT_STREQ(test, filter.suite_glob, "suite"); KUNIT_EXPECT_STREQ(test, filter.test_glob, "test"); kfree(filter.suite_glob); kfree(filter.test_glob); } static void filter_subsuite_test(struct kunit *test) { struct kunit_suite *subsuite[3] = {NULL, NULL, NULL}; struct kunit_suite * const *filtered; struct kunit_test_filter filter = { .suite_glob = "suite2", .test_glob = NULL, }; subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases); subsuite[1] = alloc_fake_suite(test, "suite2", dummy_test_cases); /* Want: suite1, suite2, NULL -> suite2, NULL */ filtered = kunit_filter_subsuite(subsuite, &filter); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered); free_subsuite_at_end(test, filtered); /* Validate we just have suite2 */ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered[0]); KUNIT_EXPECT_STREQ(test, (const char *)filtered[0]->name, "suite2"); KUNIT_EXPECT_FALSE(test, filtered[1]); } static void filter_subsuite_test_glob_test(struct kunit *test) { struct kunit_suite *subsuite[3] = {NULL, NULL, NULL}; struct kunit_suite * const *filtered; struct kunit_test_filter filter = { .suite_glob = "suite2", .test_glob = "test2", }; subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases); subsuite[1] = alloc_fake_suite(test, "suite2", dummy_test_cases); /* Want: suite1, suite2, NULL -> suite2 (just test1), NULL */ filtered = kunit_filter_subsuite(subsuite, &filter); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered); free_subsuite_at_end(test, filtered); /* Validate we just have suite2 */ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered[0]); KUNIT_EXPECT_STREQ(test, (const char *)filtered[0]->name, "suite2"); KUNIT_EXPECT_FALSE(test, filtered[1]); /* Now validate we just have test2 */ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered[0]->test_cases); KUNIT_EXPECT_STREQ(test, (const char *)filtered[0]->test_cases[0].name, "test2"); KUNIT_EXPECT_FALSE(test, filtered[0]->test_cases[1].name); } static void filter_subsuite_to_empty_test(struct kunit *test) { struct kunit_suite *subsuite[3] = {NULL, NULL, NULL}; struct kunit_suite * const *filtered; struct kunit_test_filter filter = { .suite_glob = "not_found", .test_glob = NULL, }; subsuite[0] = alloc_fake_suite(test, "suite1", dummy_test_cases); subsuite[1] = alloc_fake_suite(test, "suite2", dummy_test_cases); filtered = kunit_filter_subsuite(subsuite, &filter); free_subsuite_at_end(test, filtered); /* just in case */ KUNIT_EXPECT_FALSE_MSG(test, filtered, "should be NULL to indicate no match"); } static void kfree_subsuites_at_end(struct kunit *test, struct suite_set *suite_set) { struct kunit_suite * const * const *suites; kfree_at_end(test, suite_set->start); for (suites = suite_set->start; suites < suite_set->end; suites++) free_subsuite_at_end(test, *suites); } static void filter_suites_test(struct kunit *test) { /* Suites per-file are stored as a NULL terminated array */ struct kunit_suite *subsuites[2][2] = { {NULL, NULL}, {NULL, NULL}, }; /* Match the memory layout of suite_set */ struct kunit_suite * const * const suites[2] = { subsuites[0], subsuites[1], }; const struct suite_set suite_set = { .start = suites, .end = suites + 2, }; struct suite_set filtered = {.start = NULL, .end = NULL}; /* Emulate two files, each having one suite */ subsuites[0][0] = alloc_fake_suite(test, "suite0", dummy_test_cases); subsuites[1][0] = alloc_fake_suite(test, "suite1", dummy_test_cases); /* Filter out suite1 */ filtered = kunit_filter_suites(&suite_set, "suite0"); kfree_subsuites_at_end(test, &filtered); /* let us use ASSERTs without leaking */ KUNIT_ASSERT_EQ(test, filtered.end - filtered.start, (ptrdiff_t)1); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered.start); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered.start[0]); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, filtered.start[0][0]); KUNIT_EXPECT_STREQ(test, (const char *)filtered.start[0][0]->name, "suite0"); } static struct kunit_case executor_test_cases[] = { KUNIT_CASE(parse_filter_test), KUNIT_CASE(filter_subsuite_test), KUNIT_CASE(filter_subsuite_test_glob_test), KUNIT_CASE(filter_subsuite_to_empty_test), KUNIT_CASE(filter_suites_test), {} }; static struct kunit_suite executor_test_suite = { .name = "kunit_executor_test", .test_cases = executor_test_cases, }; kunit_test_suites(&executor_test_suite); /* Test helpers */ static void kfree_res_free(struct kunit_resource *res) { kfree(res->data); } /* Use the resource API to register a call to kfree(to_free). * Since we never actually use the resource, it's safe to use on const data. */ static void kfree_at_end(struct kunit *test, const void *to_free) { /* kfree() handles NULL already, but avoid allocating a no-op cleanup. */ if (IS_ERR_OR_NULL(to_free)) return; kunit_alloc_resource(test, NULL, kfree_res_free, GFP_KERNEL, (void *)to_free); } static void free_subsuite_res_free(struct kunit_resource *res) { kunit_free_subsuite(res->data); } static void free_subsuite_at_end(struct kunit *test, struct kunit_suite *const *to_free) { if (IS_ERR_OR_NULL(to_free)) return; kunit_alloc_resource(test, NULL, free_subsuite_res_free, GFP_KERNEL, (void *)to_free); } static struct kunit_suite *alloc_fake_suite(struct kunit *test, const char *suite_name, struct kunit_case *test_cases) { struct kunit_suite *suite; /* We normally never expect to allocate suites, hence the non-const cast. */ suite = kunit_kzalloc(test, sizeof(*suite), GFP_KERNEL); strncpy((char *)suite->name, suite_name, sizeof(suite->name) - 1); suite->test_cases = test_cases; return suite; }
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile13
-rw-r--r--tools/accounting/delaytop.c571
-rw-r--r--tools/arch/arm/include/uapi/asm/kvm.h315
-rw-r--r--tools/arch/arm64/include/asm/cputype.h34
-rw-r--r--tools/arch/arm64/include/asm/esr.h6
-rw-r--r--tools/arch/arm64/include/asm/gpr-num.h6
-rw-r--r--tools/arch/arm64/include/asm/sysreg.h15
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h2
-rw-r--r--tools/arch/loongarch/include/asm/inst.h12
-rw-r--r--tools/arch/powerpc/include/uapi/asm/kvm.h13
-rw-r--r--tools/arch/riscv/include/asm/csr.h11
-rw-r--r--tools/arch/riscv/include/asm/vdso/processor.h4
-rw-r--r--tools/arch/s390/include/uapi/asm/bitsperlong.h4
-rw-r--r--tools/arch/s390/include/uapi/asm/kvm_perf.h22
-rw-r--r--tools/arch/x86/include/asm/asm.h12
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h22
-rw-r--r--tools/arch/x86/include/asm/inat.h15
-rw-r--r--tools/arch/x86/include/asm/insn.h56
-rw-r--r--tools/arch/x86/include/asm/io.h101
-rw-r--r--tools/arch/x86/include/asm/msr-index.h41
-rw-r--r--tools/arch/x86/include/asm/special_insns.h27
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h42
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm_perf.h17
-rw-r--r--tools/arch/x86/include/uapi/asm/svm.h4
-rw-r--r--tools/arch/x86/include/uapi/asm/vmx.h7
-rw-r--r--tools/arch/x86/lib/inat.c13
-rw-r--r--tools/arch/x86/lib/insn.c35
-rw-r--r--tools/arch/x86/lib/x86-opcode-map.txt111
-rw-r--r--tools/arch/x86/tools/gen-cpu-feature-names-x86.awk34
-rw-r--r--tools/arch/x86/tools/gen-insn-attr-x86.awk44
-rw-r--r--tools/bootconfig/main.c4
-rw-r--r--tools/bpf/Makefile13
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst13
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst3
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst16
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-token.rst64
-rw-r--r--tools/bpf/bpftool/Makefile6
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool37
-rw-r--r--tools/bpf/bpftool/btf_dumper.c4
-rw-r--r--tools/bpf/bpftool/cgroup.c4
-rw-r--r--tools/bpf/bpftool/common.c93
-rw-r--r--tools/bpf/bpftool/feature.c86
-rw-r--r--tools/bpf/bpftool/gen.c68
-rw-r--r--tools/bpf/bpftool/link.c54
-rw-r--r--tools/bpf/bpftool/main.c29
-rw-r--r--tools/bpf/bpftool/main.h21
-rw-r--r--tools/bpf/bpftool/map.c3
-rw-r--r--tools/bpf/bpftool/prog.c33
-rw-r--r--tools/bpf/bpftool/sign.c217
-rw-r--r--tools/bpf/bpftool/token.c210
-rw-r--r--tools/bpf/bpftool/tracelog.c11
-rw-r--r--tools/bpf/runqslower/Makefile91
-rw-r--r--tools/bpf/runqslower/runqslower.bpf.c106
-rw-r--r--tools/bpf/runqslower/runqslower.c171
-rw-r--r--tools/bpf/runqslower/runqslower.h13
-rw-r--r--tools/build/Build2
-rw-r--r--tools/build/Makefile21
-rw-r--r--tools/build/Makefile.feature12
-rw-r--r--tools/build/feature/Makefile26
-rw-r--r--tools/build/feature/test-all.c24
-rw-r--r--tools/build/feature/test-get_cpuid.c8
-rw-r--r--tools/build/feature/test-get_current_dir_name.c11
-rw-r--r--tools/build/feature/test-libbpf-strings.c10
-rw-r--r--tools/build/feature/test-libslang-include-subdir.c7
-rw-r--r--tools/dma/.gitignore3
-rw-r--r--tools/dma/Makefile55
-rw-r--r--tools/dma/config (renamed from tools/testing/selftests/dma/config)0
-rw-r--r--tools/dma/dma_map_benchmark.c (renamed from tools/testing/selftests/dma/dma_map_benchmark.c)3
-rwxr-xr-xtools/docs/check-variable-fonts.py37
-rwxr-xr-xtools/docs/checktransupdate.py307
-rwxr-xr-xtools/docs/documentation-file-ref-check245
-rwxr-xr-xtools/docs/features-refresh.sh98
-rwxr-xr-xtools/docs/find-unused-docs.sh62
-rwxr-xr-xtools/docs/gen-redirects.py54
-rwxr-xr-xtools/docs/gen-renames.py130
-rwxr-xr-xtools/docs/get_abi.py214
-rwxr-xr-xtools/docs/get_feat.py225
-rwxr-xr-xtools/docs/list-arch.sh11
-rwxr-xr-xtools/docs/parse-headers.py60
-rwxr-xr-xtools/docs/sphinx-build-wrapper864
-rwxr-xr-xtools/docs/sphinx-pre-install1543
-rwxr-xr-xtools/docs/test_doc_build.py513
-rw-r--r--tools/gpio/Makefile2
-rw-r--r--tools/iio/iio_event_monitor.c10
-rw-r--r--tools/include/asm-generic/bitops/__fls.h2
-rw-r--r--tools/include/asm-generic/bitops/fls.h2
-rw-r--r--tools/include/asm-generic/bitops/fls64.h4
-rw-r--r--tools/include/asm-generic/io.h482
-rw-r--r--tools/include/asm/io.h11
-rw-r--r--tools/include/linux/args.h28
-rw-r--r--tools/include/linux/atomic.h22
-rw-r--r--tools/include/linux/bitmap.h1
-rw-r--r--tools/include/linux/bits.h29
-rw-r--r--tools/include/linux/cfi_types.h29
-rw-r--r--tools/include/linux/compiler.h4
-rw-r--r--tools/include/linux/gfp_types.h393
-rw-r--r--tools/include/linux/interval_tree_generic.h10
-rw-r--r--tools/include/linux/io.h4
-rw-r--r--tools/include/linux/livepatch_external.h76
-rw-r--r--tools/include/linux/objtool_types.h3
l---------tools/include/linux/pci_ids.h1
-rw-r--r--tools/include/linux/slab.h165
-rw-r--r--tools/include/linux/static_call_types.h4
-rw-r--r--tools/include/linux/string.h14
-rw-r--r--tools/include/nolibc/Makefile22
-rw-r--r--tools/include/nolibc/arch-arm.h2
-rw-r--r--tools/include/nolibc/arch-arm64.h2
-rw-r--r--tools/include/nolibc/arch-loongarch.h2
-rw-r--r--tools/include/nolibc/arch-m68k.h2
-rw-r--r--tools/include/nolibc/arch-mips.h2
-rw-r--r--tools/include/nolibc/arch-powerpc.h2
-rw-r--r--tools/include/nolibc/arch-riscv.h2
-rw-r--r--tools/include/nolibc/arch-s390.h7
-rw-r--r--tools/include/nolibc/arch-sh.h2
-rw-r--r--tools/include/nolibc/arch-sparc.h2
-rw-r--r--tools/include/nolibc/arch-x86.h10
-rw-r--r--tools/include/nolibc/arch.h11
-rw-r--r--tools/include/nolibc/compiler.h4
-rw-r--r--tools/include/nolibc/crt.h3
-rw-r--r--tools/include/nolibc/dirent.h6
-rw-r--r--tools/include/nolibc/getopt.h2
-rw-r--r--tools/include/nolibc/inttypes.h3
-rw-r--r--tools/include/nolibc/nolibc.h3
-rw-r--r--tools/include/nolibc/poll.h4
-rw-r--r--tools/include/nolibc/stackprotector.h2
-rw-r--r--tools/include/nolibc/std.h6
-rw-r--r--tools/include/nolibc/stdio.h10
-rw-r--r--tools/include/nolibc/stdlib.h2
-rw-r--r--tools/include/nolibc/string.h15
-rw-r--r--tools/include/nolibc/sys.h165
-rw-r--r--tools/include/nolibc/sys/auxv.h3
-rw-r--r--tools/include/nolibc/sys/mman.h5
-rw-r--r--tools/include/nolibc/sys/random.h4
-rw-r--r--tools/include/nolibc/sys/reboot.h2
-rw-r--r--tools/include/nolibc/sys/select.h103
-rw-r--r--tools/include/nolibc/sys/timerfd.h8
-rw-r--r--tools/include/nolibc/sys/uio.h49
-rw-r--r--tools/include/nolibc/sys/wait.h35
-rw-r--r--tools/include/nolibc/time.h29
-rw-r--r--tools/include/nolibc/types.h47
-rw-r--r--tools/include/nolibc/unistd.h8
-rw-r--r--tools/include/uapi/asm-generic/unistd.h8
-rw-r--r--tools/include/uapi/drm/drm.h63
-rw-r--r--tools/include/uapi/linux/bpf.h58
-rw-r--r--tools/include/uapi/linux/genetlink.h103
-rw-r--r--tools/include/uapi/linux/if_addr.h79
-rw-r--r--tools/include/uapi/linux/kvm.h30
-rw-r--r--tools/include/uapi/linux/neighbour.h229
-rw-r--r--tools/include/uapi/linux/netdev.h2
-rw-r--r--tools/include/uapi/linux/netfilter.h80
-rw-r--r--tools/include/uapi/linux/netfilter_arp.h23
-rw-r--r--tools/include/uapi/linux/nsfs.h87
-rw-r--r--tools/include/uapi/linux/perf_event.h23
-rw-r--r--tools/include/uapi/linux/rtnetlink.h848
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/bpf.c8
-rw-r--r--tools/lib/bpf/bpf.h5
-rw-r--r--tools/lib/bpf/bpf_gen_internal.h2
-rw-r--r--tools/lib/bpf/bpf_helpers.h28
-rw-r--r--tools/lib/bpf/bpf_tracing.h2
-rw-r--r--tools/lib/bpf/btf.c76
-rw-r--r--tools/lib/bpf/btf.h8
-rw-r--r--tools/lib/bpf/btf_dump.c1
-rw-r--r--tools/lib/bpf/elf.c1
-rw-r--r--tools/lib/bpf/features.c1
-rw-r--r--tools/lib/bpf/gen_loader.c50
-rw-r--r--tools/lib/bpf/libbpf.c418
-rw-r--r--tools/lib/bpf/libbpf.h79
-rw-r--r--tools/lib/bpf/libbpf.map3
-rw-r--r--tools/lib/bpf/libbpf_errno.c75
-rw-r--r--tools/lib/bpf/libbpf_internal.h21
-rw-r--r--tools/lib/bpf/libbpf_probes.c4
-rw-r--r--tools/lib/bpf/libbpf_utils.c256
-rw-r--r--tools/lib/bpf/linker.c4
-rw-r--r--tools/lib/bpf/relo_core.c1
-rw-r--r--tools/lib/bpf/ringbuf.c1
-rw-r--r--tools/lib/bpf/skel_internal.h76
-rw-r--r--tools/lib/bpf/str_error.c104
-rw-r--r--tools/lib/bpf/str_error.h19
-rw-r--r--tools/lib/bpf/usdt.bpf.h44
-rw-r--r--tools/lib/bpf/usdt.c75
-rw-r--r--tools/lib/perf/cpumap.c39
-rw-r--r--tools/lib/perf/include/perf/core.h2
-rw-r--r--tools/lib/perf/include/perf/event.h14
-rw-r--r--tools/lib/perf/mmap.c2
-rw-r--r--tools/lib/python/__init__.py0
-rw-r--r--tools/lib/python/abi/__init__.py0
-rw-r--r--tools/lib/python/abi/abi_parser.py628
-rw-r--r--tools/lib/python/abi/abi_regex.py234
-rw-r--r--tools/lib/python/abi/helpers.py38
-rw-r--r--tools/lib/python/abi/system_symbols.py378
-rwxr-xr-xtools/lib/python/feat/parse_features.py494
-rwxr-xr-xtools/lib/python/jobserver.py149
-rw-r--r--tools/lib/python/kdoc/__init__.py0
-rw-r--r--tools/lib/python/kdoc/enrich_formatter.py70
-rw-r--r--tools/lib/python/kdoc/kdoc_files.py294
-rw-r--r--tools/lib/python/kdoc/kdoc_item.py43
-rw-r--r--tools/lib/python/kdoc/kdoc_output.py824
-rw-r--r--tools/lib/python/kdoc/kdoc_parser.py1670
-rw-r--r--tools/lib/python/kdoc/kdoc_re.py270
-rwxr-xr-xtools/lib/python/kdoc/latex_fonts.py167
-rwxr-xr-xtools/lib/python/kdoc/parse_data_structs.py482
-rw-r--r--tools/lib/python/kdoc/python_version.py178
-rw-r--r--tools/lib/subcmd/help.c3
-rw-r--r--tools/lib/thermal/Makefile9
-rw-r--r--tools/lib/thermal/libthermal.map5
-rw-r--r--tools/mm/page_owner_sort.c14
-rw-r--r--tools/mm/slabinfo.c7
-rw-r--r--tools/net/sunrpc/xdrgen/generators/__init__.py11
-rw-r--r--tools/net/sunrpc/xdrgen/generators/union.py34
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j24
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/union/declaration/close.j24
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j22
-rw-r--r--tools/net/sunrpc/xdrgen/templates/C/union/encoder/string.j26
-rwxr-xr-xtools/net/sunrpc/xdrgen/xdrgen5
-rw-r--r--tools/net/ynl/Makefile29
-rw-r--r--tools/net/ynl/Makefile.deps1
-rw-r--r--tools/net/ynl/lib/ynl-priv.h14
-rw-r--r--tools/net/ynl/lib/ynl.c6
-rwxr-xr-xtools/net/ynl/pyynl/cli.py100
-rwxr-xr-xtools/net/ynl/pyynl/ethtool.py17
-rw-r--r--tools/net/ynl/pyynl/lib/__init__.py4
-rw-r--r--tools/net/ynl/pyynl/lib/doc_generator.py402
-rw-r--r--tools/net/ynl/pyynl/lib/nlspec.py2
-rw-r--r--tools/net/ynl/pyynl/lib/ynl.py93
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py175
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_rst.py384
-rw-r--r--tools/net/ynl/samples/.gitignore1
-rw-r--r--tools/net/ynl/samples/Makefile1
-rw-r--r--tools/net/ynl/samples/page-pool.c149
-rw-r--r--tools/net/ynl/samples/tc-filter-add.c335
-rw-r--r--tools/net/ynl/tests/Makefile32
-rw-r--r--tools/net/ynl/tests/config6
-rwxr-xr-xtools/net/ynl/tests/test_ynl_cli.sh327
-rwxr-xr-xtools/net/ynl/tests/test_ynl_ethtool.sh222
-rw-r--r--tools/net/ynl/ynltool/.gitignore2
-rw-r--r--tools/net/ynl/ynltool/Makefile55
-rw-r--r--tools/net/ynl/ynltool/json_writer.c288
-rw-r--r--tools/net/ynl/ynltool/json_writer.h75
-rw-r--r--tools/net/ynl/ynltool/main.c242
-rw-r--r--tools/net/ynl/ynltool/main.h66
-rw-r--r--tools/net/ynl/ynltool/page-pool.c461
-rw-r--r--tools/net/ynl/ynltool/qstats.c621
-rw-r--r--tools/objtool/.gitignore3
-rw-r--r--tools/objtool/Build8
-rw-r--r--tools/objtool/Makefile70
-rw-r--r--tools/objtool/arch/loongarch/decode.c62
-rw-r--r--tools/objtool/arch/loongarch/orc.c1
-rw-r--r--tools/objtool/arch/loongarch/special.c28
-rw-r--r--tools/objtool/arch/powerpc/decode.c31
-rw-r--r--tools/objtool/arch/powerpc/special.c5
-rw-r--r--tools/objtool/arch/x86/Build13
-rw-r--r--tools/objtool/arch/x86/decode.c123
-rw-r--r--tools/objtool/arch/x86/orc.c1
-rw-r--r--tools/objtool/arch/x86/special.c12
-rw-r--r--tools/objtool/builtin-check.c102
-rw-r--r--tools/objtool/builtin-klp.c53
-rw-r--r--tools/objtool/check.c1571
-rw-r--r--tools/objtool/disas.c1248
-rw-r--r--tools/objtool/elf.c822
-rw-r--r--tools/objtool/include/objtool/arch.h17
-rw-r--r--tools/objtool/include/objtool/builtin.h13
-rw-r--r--tools/objtool/include/objtool/check.h39
-rw-r--r--tools/objtool/include/objtool/checksum.h43
-rw-r--r--tools/objtool/include/objtool/checksum_types.h25
-rw-r--r--tools/objtool/include/objtool/disas.h81
-rw-r--r--tools/objtool/include/objtool/elf.h199
-rw-r--r--tools/objtool/include/objtool/endianness.h9
-rw-r--r--tools/objtool/include/objtool/klp.h35
-rw-r--r--tools/objtool/include/objtool/objtool.h6
-rw-r--r--tools/objtool/include/objtool/special.h4
-rw-r--r--tools/objtool/include/objtool/trace.h141
-rw-r--r--tools/objtool/include/objtool/util.h19
-rw-r--r--tools/objtool/include/objtool/warn.h66
-rw-r--r--tools/objtool/klp-diff.c1723
-rw-r--r--tools/objtool/klp-post-link.c168
-rw-r--r--tools/objtool/noreturns.h2
-rw-r--r--tools/objtool/objtool.c44
-rw-r--r--tools/objtool/orc_dump.c1
-rw-r--r--tools/objtool/orc_gen.c9
-rw-r--r--tools/objtool/signal.c135
-rw-r--r--tools/objtool/special.c16
-rwxr-xr-xtools/objtool/sync-check.sh2
-rw-r--r--tools/objtool/trace.c203
-rw-r--r--tools/objtool/weak.c7
-rw-r--r--tools/perf/Documentation/Build.txt15
-rw-r--r--tools/perf/Documentation/android.txt80
-rw-r--r--tools/perf/Documentation/intel-acr.txt53
-rw-r--r--tools/perf/Documentation/perf-annotate.txt1
-rw-r--r--tools/perf/Documentation/perf-arm-spe.txt118
-rw-r--r--tools/perf/Documentation/perf-bench.txt58
-rw-r--r--tools/perf/Documentation/perf-c2c.txt7
-rw-r--r--tools/perf/Documentation/perf-check.txt2
-rw-r--r--tools/perf/Documentation/perf-config.txt3
-rw-r--r--tools/perf/Documentation/perf-diff.txt2
-rw-r--r--tools/perf/Documentation/perf-list.txt3
-rw-r--r--tools/perf/Documentation/perf-record.txt4
-rw-r--r--tools/perf/Documentation/perf-script.txt5
-rw-r--r--tools/perf/Documentation/perf-timechart.txt3
-rw-r--r--tools/perf/Documentation/perf-trace.txt4
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt10
-rw-r--r--tools/perf/Makefile.config114
-rw-r--r--tools/perf/Makefile.perf37
-rw-r--r--tools/perf/arch/arm/annotate/instructions.c1
-rw-r--r--tools/perf/arch/arm/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/arm/util/Build2
-rw-r--r--tools/perf/arch/arm/util/auxtrace.c1
-rw-r--r--tools/perf/arch/arm/util/pmu.c2
-rw-r--r--tools/perf/arch/arm64/annotate/instructions.c1
-rw-r--r--tools/perf/arch/arm64/util/Build19
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c6
-rw-r--r--tools/perf/arch/arm64/util/arm64_exception_types.h15
-rw-r--r--tools/perf/arch/arm64/util/hisi-ptt.c1
-rw-r--r--tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl2
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/powerpc/util/Build1
-rw-r--r--tools/perf/arch/powerpc/util/auxtrace.c103
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/s390/util/Build2
-rw-r--r--tools/perf/arch/s390/util/auxtrace.c1
-rw-r--r--tools/perf/arch/sh/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/sparc/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/x86/annotate/instructions.c187
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_32.tbl2
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl3
-rw-r--r--tools/perf/arch/x86/tests/Build4
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c4
-rw-r--r--tools/perf/arch/x86/tests/intel-pt-test.c6
-rw-r--r--tools/perf/arch/x86/tests/topdown.c2
-rw-r--r--tools/perf/arch/x86/util/Build6
-rw-r--r--tools/perf/arch/x86/util/evsel.c114
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c6
-rw-r--r--tools/perf/arch/x86/util/kvm-stat.c51
-rw-r--r--tools/perf/arch/x86/util/pmu.c2
-rw-r--r--tools/perf/arch/x86/util/topdown.c1
-rw-r--r--tools/perf/arch/xtensa/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/evlist-open-close.c1
-rw-r--r--tools/perf/bench/find-bit-bench.c2
-rw-r--r--tools/perf/bench/futex.c1
-rw-r--r--tools/perf/bench/futex.h1
-rw-r--r--tools/perf/bench/mem-functions.c390
-rw-r--r--tools/perf/bench/mem-memcpy-arch.h2
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h4
-rw-r--r--tools/perf/bench/mem-memset-arch.h2
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h4
-rw-r--r--tools/perf/bench/pmu-scan.c1
-rw-r--r--tools/perf/bench/synthesize.c1
-rw-r--r--tools/perf/builtin-annotate.c10
-rw-r--r--tools/perf/builtin-bench.c1
-rw-r--r--tools/perf/builtin-c2c.c195
-rw-r--r--tools/perf/builtin-check.c5
-rw-r--r--tools/perf/builtin-evlist.c3
-rw-r--r--tools/perf/builtin-inject.c48
-rw-r--r--tools/perf/builtin-kvm.c132
-rw-r--r--tools/perf/builtin-kwork.c27
-rw-r--r--tools/perf/builtin-list.c169
-rw-r--r--tools/perf/builtin-lock.c9
-rw-r--r--tools/perf/builtin-mem.c1
-rw-r--r--tools/perf/builtin-record.c163
-rw-r--r--tools/perf/builtin-report.c6
-rw-r--r--tools/perf/builtin-sched.c19
-rw-r--r--tools/perf/builtin-script.c410
-rw-r--r--tools/perf/builtin-stat.c470
-rw-r--r--tools/perf/builtin-timechart.c15
-rw-r--r--tools/perf/builtin-top.c8
-rw-r--r--tools/perf/builtin-trace.c39
-rwxr-xr-xtools/perf/check-headers.sh12
-rw-r--r--tools/perf/perf.h2
-rw-r--r--tools/perf/pmu-events/Build27
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json8
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json26
-rw-r--r--tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json2
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/ddrc.json9
-rw-r--r--tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/metrics.json450
-rw-r--r--tools/perf/pmu-events/arch/common/common/legacy-hardware.json72
-rw-r--r--tools/perf/pmu-events/arch/common/common/metrics.json151
-rw-r--r--tools/perf/pmu-events/arch/common/common/software.json6
-rw-r--r--tools/perf/pmu-events/arch/common/common/tool.json12
-rw-r--r--tools/perf/pmu-events/arch/riscv/mapfile.csv1
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z16/transaction.json8
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z17/transaction.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json104
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/cache.json151
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/floating-point.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/frontend.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/memory.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/other.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/pipeline.json169
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/uncore-interconnect.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/cache.json16
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json6
-rw-r--r--tools/perf/pmu-events/arch/x86/alderlaken/uncore-interconnect.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/arl-metrics.json180
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/cache.json465
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/floating-point.json73
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/frontend.json112
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/memory.json92
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/other.json121
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/pipeline.json444
-rw-r--r--tools/perf/pmu-events/arch/x86/arrowlake/virtual-memory.json113
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json33
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json139
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json12
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/cache.json163
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/emr-metrics.json143
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/other.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json167
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json104
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-power.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/emeraldrapids/virtual-memory.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/grandridge/cache.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/cache.json231
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/floating-point.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/frontend.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json131
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/memory.json33
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/other.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json167
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/uncore-cache.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json1
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/uncore-memory.json143
-rw-r--r--tools/perf/pmu-events/arch/x86/graniterapids/virtual-memory.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json32
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json35
-rw-r--r--tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json96
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json155
-rw-r--r--tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json4
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json33
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/cache.json170
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/frontend.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/lnl-metrics.json216
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/memory.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/other.json3
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json111
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/uncore-interconnect.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/lunarlake/uncore-memory.json8
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv24
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/cache.json145
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/frontend.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/memory.json15
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json103
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/other.json5
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/pipeline.json173
-rw-r--r--tools/perf/pmu-events/arch/x86/meteorlake/virtual-memory.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/cache.json1163
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/counter.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/floating-point.json359
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/frontend.json535
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/memory.json115
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/other.json44
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/pipeline.json1903
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/uncore-memory.json26
-rw-r--r--tools/perf/pmu-events/arch/x86/pantherlake/virtual-memory.json248
-rw-r--r--tools/perf/pmu-events/arch/x86/rocketlake/rkl-metrics.json97
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json19
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/cache.json163
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/floating-point.json43
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/frontend.json42
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/memory.json30
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/other.json28
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/pipeline.json167
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/spr-metrics.json165
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-cache.json11
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-memory.json104
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/uncore-power.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/sapphirerapids/virtual-memory.json40
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/cache.json41
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/srf-metrics.json20
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/uncore-cache.json9
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/uncore-interconnect.json10
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/uncore-io.json1
-rw-r--r--tools/perf/pmu-events/arch/x86/sierraforest/uncore-memory.json103
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json101
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json101
-rw-r--r--tools/perf/pmu-events/arch/x86/tigerlake/tgl-metrics.json97
-rw-r--r--tools/perf/pmu-events/empty-pmu-events.c2854
-rwxr-xr-xtools/perf/pmu-events/jevents.py73
-rwxr-xr-xtools/perf/pmu-events/make_legacy_cache.py129
-rw-r--r--tools/perf/pmu-events/metric.py85
-rwxr-xr-xtools/perf/pmu-events/metric_test.py4
-rw-r--r--tools/perf/pmu-events/pmu-events.h3
-rwxr-xr-xtools/perf/python/ilist.py515
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Build2
-rw-r--r--tools/perf/tests/Build3
-rw-r--r--tools/perf/tests/builtin-test.c5
-rw-r--r--tools/perf/tests/code-reading.c126
-rw-r--r--tools/perf/tests/hwmon_pmu.c1
-rw-r--r--tools/perf/tests/kallsyms-split.c156
-rw-r--r--tools/perf/tests/keep-tracking.c2
-rw-r--r--tools/perf/tests/make12
-rw-r--r--tools/perf/tests/maps.c82
-rw-r--r--tools/perf/tests/mmap-basic.c2
-rw-r--r--tools/perf/tests/parse-events.c2078
-rw-r--r--tools/perf/tests/parse-metric.c3
-rw-r--r--tools/perf/tests/perf-record.c40
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c4
-rw-r--r--tools/perf/tests/pfm.c1
-rw-r--r--tools/perf/tests/pmu-events.c26
-rw-r--r--tools/perf/tests/pmu.c3
-rw-r--r--tools/perf/tests/python-use.c27
-rwxr-xr-xtools/perf/tests/shell/amd-ibs-swfilt.sh51
-rw-r--r--tools/perf/tests/shell/attr/test-stat-default7
-rw-r--r--tools/perf/tests/shell/attr/test-stat-detailed-17
-rw-r--r--tools/perf/tests/shell/attr/test-stat-detailed-27
-rw-r--r--tools/perf/tests/shell/attr/test-stat-detailed-37
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_adding_blacklisted.sh20
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_adding_kernel.sh97
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_basic.sh31
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_invalid_options.sh14
-rwxr-xr-xtools/perf/tests/shell/base_probe/test_line_semantics.sh7
-rwxr-xr-xtools/perf/tests/shell/base_report/setup.sh10
-rwxr-xr-xtools/perf/tests/shell/base_report/test_basic.sh103
-rwxr-xr-xtools/perf/tests/shell/buildid.sh203
-rwxr-xr-xtools/perf/tests/shell/c2c.sh62
-rw-r--r--tools/perf/tests/shell/common/init.sh4
-rw-r--r--tools/perf/tests/shell/coresight/memcpy_thread/memcpy_thread.c2
-rw-r--r--tools/perf/tests/shell/coresight/thread_loop/thread_loop.c4
-rw-r--r--tools/perf/tests/shell/coresight/unroll_loop_thread/unroll_loop_thread.c4
-rwxr-xr-xtools/perf/tests/shell/evlist.sh79
-rwxr-xr-xtools/perf/tests/shell/jitdump-python.sh81
-rwxr-xr-xtools/perf/tests/shell/kallsyms.sh56
-rwxr-xr-xtools/perf/tests/shell/kvm.sh154
-rw-r--r--tools/perf/tests/shell/lib/perf_json_output_lint.py9
-rw-r--r--tools/perf/tests/shell/lib/stat_output.sh2
-rwxr-xr-xtools/perf/tests/shell/lock_contention.sh21
-rwxr-xr-xtools/perf/tests/shell/python-use.sh36
-rwxr-xr-xtools/perf/tests/shell/record.sh40
-rwxr-xr-xtools/perf/tests/shell/record_lbr.sh26
-rwxr-xr-xtools/perf/tests/shell/record_weak_term.sh37
-rwxr-xr-xtools/perf/tests/shell/script_dlfilter.sh107
-rwxr-xr-xtools/perf/tests/shell/stat+csv_output.sh2
-rwxr-xr-xtools/perf/tests/shell/stat+event_uniquifying.sh109
-rwxr-xr-xtools/perf/tests/shell/stat+json_output.sh2
-rwxr-xr-xtools/perf/tests/shell/stat+shadow_stat.sh4
-rwxr-xr-xtools/perf/tests/shell/stat+std_output.sh10
-rwxr-xr-xtools/perf/tests/shell/stat.sh45
-rwxr-xr-xtools/perf/tests/shell/stat_all_metricgroups.sh3
-rwxr-xr-xtools/perf/tests/shell/stat_all_metrics.sh30
-rwxr-xr-xtools/perf/tests/shell/test_bpf_metadata.sh2
-rwxr-xr-xtools/perf/tests/shell/test_brstack.sh106
-rwxr-xr-xtools/perf/tests/shell/test_event_open_fallback.sh71
-rwxr-xr-xtools/perf/tests/shell/timechart.sh67
-rwxr-xr-xtools/perf/tests/shell/top.sh74
-rwxr-xr-xtools/perf/tests/shell/trace_btf_enum.sh11
-rw-r--r--tools/perf/tests/switch-tracking.c2
-rw-r--r--tools/perf/tests/tests.h5
-rw-r--r--tools/perf/tests/workloads/Build2
-rw-r--r--tools/perf/tests/workloads/thloop.c45
-rw-r--r--tools/perf/tests/workloads/traploop.c31
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h5
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fcntl.h19
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fs.h93
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/prctl.h19
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/vhost.h35
-rw-r--r--tools/perf/ui/browsers/annotate.c237
-rw-r--r--tools/perf/ui/browsers/hists.c2
-rw-r--r--tools/perf/ui/hist.c1
-rw-r--r--tools/perf/ui/libslang.h4
-rw-r--r--tools/perf/util/Build29
-rw-r--r--tools/perf/util/addr2line.c439
-rw-r--r--tools/perf/util/addr2line.h20
-rw-r--r--tools/perf/util/annotate-data.c97
-rw-r--r--tools/perf/util/annotate-data.h27
-rw-r--r--tools/perf/util/annotate.c205
-rw-r--r--tools/perf/util/annotate.h31
-rw-r--r--tools/perf/util/arm-spe-decoder/Build2
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c93
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h94
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c67
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h47
-rw-r--r--tools/perf/util/arm-spe.c293
-rw-r--r--tools/perf/util/arm-spe.h2
-rw-r--r--tools/perf/util/auxtrace.c34
-rw-r--r--tools/perf/util/auxtrace.h228
-rw-r--r--tools/perf/util/bpf-event.c39
-rw-r--r--tools/perf/util/bpf-filter.c5
-rw-r--r--tools/perf/util/bpf-filter.h2
-rw-r--r--tools/perf/util/bpf-trace-summary.c41
-rw-r--r--tools/perf/util/bpf-utils.c61
-rw-r--r--tools/perf/util/bpf-utils.h10
-rw-r--r--tools/perf/util/bpf_counter.c93
-rw-r--r--tools/perf/util/bpf_counter.h74
-rw-r--r--tools/perf/util/bpf_counter_cgroup.c84
-rw-r--r--tools/perf/util/bpf_ftrace.c4
-rw-r--r--tools/perf/util/bpf_lock_contention.c6
-rw-r--r--tools/perf/util/bpf_map.c1
-rw-r--r--tools/perf/util/bpf_off_cpu.c1
-rw-r--r--tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c4
-rw-r--r--tools/perf/util/bpf_skel/bperf_cgroup.bpf.c18
-rw-r--r--tools/perf/util/bpf_skel/bperf_cgroup.h15
-rw-r--r--tools/perf/util/bpf_skel/kwork_top.bpf.c2
-rw-r--r--tools/perf/util/bpf_skel/sample_filter.bpf.c2
-rw-r--r--tools/perf/util/build-id.c7
-rw-r--r--tools/perf/util/callchain.c51
-rw-r--r--tools/perf/util/callchain.h4
-rw-r--r--tools/perf/util/capstone.c471
-rw-r--r--tools/perf/util/capstone.h24
-rw-r--r--tools/perf/util/cgroup.c1
-rw-r--r--tools/perf/util/config.c5
-rw-r--r--tools/perf/util/cpumap.c9
-rw-r--r--tools/perf/util/cs-etm-decoder/Build2
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c44
-rw-r--r--tools/perf/util/cs-etm.c7
-rw-r--r--tools/perf/util/debuginfo.c8
-rw-r--r--tools/perf/util/disasm.c652
-rw-r--r--tools/perf/util/disasm.h6
-rw-r--r--tools/perf/util/disasm_bpf.c195
-rw-r--r--tools/perf/util/disasm_bpf.h12
-rw-r--r--tools/perf/util/drm_pmu.c7
-rw-r--r--tools/perf/util/dso.c112
-rw-r--r--tools/perf/util/dso.h25
-rw-r--r--tools/perf/util/dwarf-aux.c69
-rw-r--r--tools/perf/util/dwarf-aux.h2
-rw-r--r--tools/perf/util/env.c22
-rw-r--r--tools/perf/util/env.h2
-rw-r--r--tools/perf/util/event.c1
-rw-r--r--tools/perf/util/event.h20
-rw-r--r--tools/perf/util/evlist.c19
-rw-r--r--tools/perf/util/evlist.h2
-rw-r--r--tools/perf/util/evsel.c244
-rw-r--r--tools/perf/util/evsel.h8
-rw-r--r--tools/perf/util/evsel_config.h1
-rw-r--r--tools/perf/util/evsel_fprintf.c5
-rw-r--r--tools/perf/util/evswitch.c1
-rw-r--r--tools/perf/util/expr.c8
-rw-r--r--tools/perf/util/genelf.c32
-rw-r--r--tools/perf/util/get_current_dir_name.c18
-rw-r--r--tools/perf/util/get_current_dir_name.h8
-rw-r--r--tools/perf/util/header.c19
-rw-r--r--tools/perf/util/header.h6
-rw-r--r--tools/perf/util/hisi-ptt-decoder/Build2
-rw-r--r--tools/perf/util/hist.c6
-rw-r--r--tools/perf/util/hist.h20
-rw-r--r--tools/perf/util/hwmon_pmu.c5
-rw-r--r--tools/perf/util/hwmon_pmu.h2
-rw-r--r--tools/perf/util/include/linux/linkage.h2
-rw-r--r--tools/perf/util/intel-bts.c4
-rw-r--r--tools/perf/util/intel-pt-decoder/Build8
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c2
-rw-r--r--tools/perf/util/intel-pt.c4
-rw-r--r--tools/perf/util/intel-tpebs.c4
-rw-r--r--tools/perf/util/jitdump.c5
-rw-r--r--tools/perf/util/kvm-stat.h11
-rw-r--r--tools/perf/util/libbfd.c643
-rw-r--r--tools/perf/util/libbfd.h82
-rw-r--r--tools/perf/util/llvm.c273
-rw-r--r--tools/perf/util/llvm.h21
-rw-r--r--tools/perf/util/lzma.c2
-rw-r--r--tools/perf/util/machine.c1
-rw-r--r--tools/perf/util/map.c19
-rw-r--r--tools/perf/util/map.h6
-rw-r--r--tools/perf/util/maps.c31
-rw-r--r--tools/perf/util/mem-events.c5
-rw-r--r--tools/perf/util/metricgroup.c95
-rw-r--r--tools/perf/util/metricgroup.h2
-rw-r--r--tools/perf/util/mmap.c1
-rw-r--r--tools/perf/util/mutex.c14
-rw-r--r--tools/perf/util/mutex.h2
-rw-r--r--tools/perf/util/namespaces.c7
-rw-r--r--tools/perf/util/parse-events.c455
-rw-r--r--tools/perf/util/parse-events.h25
-rw-r--r--tools/perf/util/parse-events.l78
-rw-r--r--tools/perf/util/parse-events.y114
-rw-r--r--tools/perf/util/perf_api_probe.c27
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c2
-rw-r--r--tools/perf/util/pfm.c1
-rw-r--r--tools/perf/util/pmu.c322
-rw-r--r--tools/perf/util/pmu.h33
-rw-r--r--tools/perf/util/powerpc-vpadtl.c733
-rw-r--r--tools/perf/util/powerpc-vpadtl.h23
-rw-r--r--tools/perf/util/print-events.c112
-rw-r--r--tools/perf/util/print-events.h4
-rw-r--r--tools/perf/util/print_insn.c117
-rw-r--r--tools/perf/util/probe-event.c12
-rw-r--r--tools/perf/util/python.c560
-rw-r--r--tools/perf/util/s390-sample-raw.c55
-rw-r--r--tools/perf/util/sample.h2
-rw-r--r--tools/perf/util/scripting-engines/Build2
-rw-r--r--tools/perf/util/session.c184
-rw-r--r--tools/perf/util/session.h3
-rw-r--r--tools/perf/util/setup.py14
-rw-r--r--tools/perf/util/srcline.c772
-rw-r--r--tools/perf/util/srcline.h9
-rw-r--r--tools/perf/util/stat-display.c68
-rw-r--r--tools/perf/util/stat-shadow.c547
-rw-r--r--tools/perf/util/stat.c59
-rw-r--r--tools/perf/util/stat.h32
-rw-r--r--tools/perf/util/symbol-elf.c103
-rw-r--r--tools/perf/util/symbol-minimal.c62
-rw-r--r--tools/perf/util/symbol.c164
-rw-r--r--tools/perf/util/synthetic-events.c2
-rw-r--r--tools/perf/util/synthetic-events.h15
-rw-r--r--tools/perf/util/tool.c222
-rw-r--r--tools/perf/util/tool.h23
-rw-r--r--tools/perf/util/tool_pmu.c105
-rw-r--r--tools/perf/util/tool_pmu.h10
-rw-r--r--tools/perf/util/tp_pmu.c2
-rw-r--r--tools/perf/util/trace.h4
-rw-r--r--tools/perf/util/zlib.c2
-rw-r--r--tools/power/acpi/os_specific/service_layers/oslinuxtbl.c4
-rw-r--r--tools/power/acpi/tools/acpidump/apdump.c3
-rw-r--r--tools/power/acpi/tools/acpidump/apfiles.c2
-rw-r--r--tools/power/acpi/tools/pfrut/pfrut.c7
-rw-r--r--tools/power/cpupower/.gitignore3
-rw-r--r--tools/power/cpupower/Makefile32
-rw-r--r--tools/power/cpupower/lib/cpuidle.c5
-rw-r--r--tools/power/cpupower/lib/cpupower.c2
-rw-r--r--tools/power/cpupower/man/cpupower-set.17
-rw-r--r--tools/power/cpupower/utils/cpufreq-info.c16
-rw-r--r--tools/power/cpupower/utils/cpupower-set.c5
-rw-r--r--tools/power/cpupower/utils/helpers/helpers.h14
-rw-r--r--tools/power/cpupower/utils/helpers/misc.c76
-rwxr-xr-xtools/power/x86/amd_pstate_tracer/amd_pstate_trace.py2
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c2
-rw-r--r--tools/power/x86/intel-speed-select/isst-core-tpmi.c46
-rw-r--r--tools/power/x86/turbostat/turbostat.827
-rw-r--r--tools/power/x86/turbostat/turbostat.c1209
-rw-r--r--tools/power/x86/x86_energy_perf_policy/Makefile29
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.815
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c178
-rw-r--r--tools/sched_ext/Makefile4
-rw-r--r--tools/sched_ext/include/scx/bpf_arena_common.bpf.h175
-rw-r--r--tools/sched_ext/include/scx/bpf_arena_common.h33
-rw-r--r--tools/sched_ext/include/scx/common.bpf.h119
-rw-r--r--tools/sched_ext/include/scx/common.h5
-rw-r--r--tools/sched_ext/include/scx/compat.bpf.h330
-rw-r--r--tools/sched_ext/include/scx/compat.h14
-rw-r--r--tools/sched_ext/include/scx/user_exit_info.bpf.h40
-rw-r--r--tools/sched_ext/include/scx/user_exit_info.h49
-rw-r--r--tools/sched_ext/include/scx/user_exit_info_common.h30
-rw-r--r--tools/sched_ext/scx_central.bpf.c2
-rw-r--r--tools/sched_ext/scx_central.c1
-rw-r--r--tools/sched_ext/scx_cpu0.bpf.c88
-rw-r--r--tools/sched_ext/scx_cpu0.c106
-rw-r--r--tools/sched_ext/scx_flatcg.bpf.c12
-rw-r--r--tools/sched_ext/scx_flatcg.c2
-rw-r--r--tools/sched_ext/scx_qmap.bpf.c150
-rw-r--r--tools/sched_ext/scx_qmap.c12
-rw-r--r--tools/sched_ext/scx_simple.c2
-rw-r--r--tools/scripts/syscall.tbl2
-rw-r--r--tools/testing/cxl/Kbuild10
-rw-r--r--tools/testing/cxl/cxl_core_exports.c22
-rw-r--r--tools/testing/cxl/exports.h13
-rw-r--r--tools/testing/cxl/test/Kbuild1
-rw-r--r--tools/testing/cxl/test/cxl.c169
-rw-r--r--tools/testing/cxl/test/cxl_translate.c445
-rw-r--r--tools/testing/cxl/test/mem.c11
-rw-r--r--tools/testing/cxl/test/mock.c148
-rw-r--r--tools/testing/cxl/test/mock.h13
-rwxr-xr-xtools/testing/ktest/config-bisect.pl4
-rw-r--r--tools/testing/kunit/configs/arch_uml.config5
-rwxr-xr-xtools/testing/kunit/kunit.py4
-rw-r--r--tools/testing/kunit/kunit_parser.py8
-rw-r--r--tools/testing/kunit/qemu_configs/mips.py18
-rw-r--r--tools/testing/kunit/qemu_configs/mips64.py19
-rw-r--r--tools/testing/kunit/qemu_configs/mips64el.py19
-rw-r--r--tools/testing/kunit/qemu_configs/mipsel.py18
-rw-r--r--tools/testing/kunit/test_data/test_is_test_passed-kselftest.log3
-rw-r--r--tools/testing/nvdimm/test/ndtest.c13
-rw-r--r--tools/testing/nvdimm/test/nfit.c7
-rw-r--r--tools/testing/radix-tree/idr-test.c16
-rw-r--r--tools/testing/radix-tree/maple.c524
-rw-r--r--tools/testing/scatterlist/linux/mm.h1
-rw-r--r--tools/testing/selftests/Makefile5
-rw-r--r--tools/testing/selftests/acct/acct_syscall.c2
-rw-r--r--tools/testing/selftests/alsa/conf.c4
-rw-r--r--tools/testing/selftests/alsa/mixer-test.c10
-rw-r--r--tools/testing/selftests/alsa/pcm-test.c10
-rw-r--r--tools/testing/selftests/alsa/test-pcmtest-driver.c2
-rw-r--r--tools/testing/selftests/alsa/utimer-test.c2
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c24
-rw-r--r--tools/testing/selftests/arm64/abi/ptrace.c2
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.c2
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c14
-rw-r--r--tools/testing/selftests/arm64/bti/assembler.h1
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.c13
-rw-r--r--tools/testing/selftests/arm64/fp/fp-stress.c8
-rw-r--r--tools/testing/selftests/arm64/fp/kernel-test.c4
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c2
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c167
-rw-r--r--tools/testing/selftests/arm64/fp/vec-syscfg.c3
-rw-r--r--tools/testing/selftests/arm64/fp/za-ptrace.c2
-rw-r--r--tools/testing/selftests/arm64/fp/zt-ptrace.c3
-rw-r--r--tools/testing/selftests/arm64/fp/zt-test.S2
-rw-r--r--tools/testing/selftests/arm64/gcs/Makefile6
-rw-r--r--tools/testing/selftests/arm64/gcs/basic-gcs.c12
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-locking.c1
-rw-r--r--tools/testing/selftests/arm64/gcs/gcs-stress.c4
-rw-r--r--tools/testing/selftests/arm64/pauth/exec_target.c7
-rw-r--r--tools/testing/selftests/arm64/pauth/pac.c2
-rw-r--r--tools/testing/selftests/arm64/tags/tags_test.c2
-rw-r--r--tools/testing/selftests/bpf/.gitignore3
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x1
-rw-r--r--tools/testing/selftests/bpf/Makefile85
-rw-r--r--tools/testing/selftests/bpf/bench.c22
-rw-r--r--tools/testing/selftests/bpf/bench.h1
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c555
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c65
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_sockmap.c5
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c65
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh4
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_trigger.sh4
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_list.h6
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_strsearch.h128
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h56
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h15
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h3
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c20
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/config9
-rw-r--r--tools/testing/selftests/bpf/config.aarch6412
-rw-r--r--tools/testing/selftests/bpf/config.ppc64el1
-rw-r--r--tools/testing/selftests/bpf/config.riscv641
-rw-r--r--tools/testing/selftests/bpf/config.s390x11
-rw-r--r--tools/testing/selftests/bpf/config.x86_645
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c54
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c178
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_strsearch.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arg_parsing.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomics.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_gotox.c292
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c504
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_split.c87
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c122
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fd_array.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/file_reader.c117
-rw-r--r--tools/testing/selftests/bpf/prog_tests/free_timer.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/htab_update.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kernel_flag.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c247
-rw-r--r--tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c107
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_excl.c54
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c140
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_branches.c22
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning_htab.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c125
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c56
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/res_spin_lock.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c67
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sha256.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c292
-rw-r--r--tools/testing/selftests/bpf/prog_tests/socket_helpers.h9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c150
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stream.c131
-rw-r--r--tools/testing/selftests/bpf/prog_tests/string_kfuncs.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_data.h386
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_work_stress.c130
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c390
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_lsm.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_local_data.c297
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_task_work.c157
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tc_edt.c145
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c714
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c107
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_veristat.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.c2596
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_xsk.h298
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_crash.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_lockup.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_mim.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_struct.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe.c156
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c484
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c121
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/wq.c56
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c265
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c179
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xsk.c151
-rw-r--r--tools/testing/selftests/bpf/progs/arena_atomics.c9
-rw-r--r--tools/testing/selftests/bpf/progs/arena_spin_lock.c5
-rw-r--r--tools/testing/selftests/bpf/progs/arena_strsearch.c146
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cc_cubic.c11
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c7
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_gotox.c448
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c17
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c3
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h28
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_smc.c117
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_test_utils.h18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h14
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_read_xattr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c12
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c21
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_sanity.c46
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c258
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c67
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_assert.c34
-rw-r--r--tools/testing/selftests/bpf/progs/file_reader.c145
-rw-r--r--tools/testing/selftests/bpf/progs/file_reader_fail.c52
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/htab_update.c19
-rw-r--r--tools/testing/selftests/bpf/progs/ip_check_defrag.c5
-rw-r--r--tools/testing/selftests/bpf/progs/iters_looping.c53
-rw-r--r--tools/testing/selftests/bpf/progs/iters_state_safety.c6
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task_failure.c4
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod.c46
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod_seq.c6
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_write_ctx.c22
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_fail.c5
-rw-r--r--tools/testing/selftests/bpf/progs/livepatch_trampoline.c30
-rw-r--r--tools/testing/selftests/bpf/progs/loop1.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop2.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop3.c7
-rw-r--r--tools/testing/selftests/bpf/progs/loop6.c21
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie.h30
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie_bench.c230
-rw-r--r--tools/testing/selftests/bpf/progs/lpm_trie_map.c19
-rw-r--r--tools/testing/selftests/bpf/progs/lsm.c8
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_tailcall.c8
-rw-r--r--tools/testing/selftests/bpf/progs/map_excl.c34
-rw-r--r--tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c4
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_sockmap.c43
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_subflow.c2
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_search.c2
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c40
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr.c60
-rw-r--r--tools/testing/selftests/bpf/progs/ringbuf_bench.c11
-rw-r--r--tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c104
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_ips.c49
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_map.c (renamed from tools/testing/selftests/bpf/progs/test_stacktrace_map.c)2
-rw-r--r--tools/testing/selftests/bpf/progs/stream.c158
-rw-r--r--tools/testing/selftests/bpf/progs/stream_fail.c6
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c18
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c3
-rw-r--r--tools/testing/selftests/bpf/progs/string_kfuncs_success.c23
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h6
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c59
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c59
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c2
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_refcounted.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c3
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c3
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_data.bpf.h237
-rw-r--r--tools/testing/selftests/bpf/progs/task_work.c107
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_fail.c96
-rw-r--r--tools/testing/selftests/bpf/progs/task_work_stress.c73
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_check_mtu.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_overhead.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_branches.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_devmap.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_htab.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c98
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_local_data.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_edt.c11
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c95
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_uprobe.c38
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c637
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_pull_data.c48
-rw-r--r--tools/testing/selftests/bpf/progs/timer_interrupt.c48
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct.c33
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c18
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_syscall.c4
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c60
-rw-r--r--tools/testing/selftests/bpf/progs/uretprobe_stack.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c1
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_async_cb_context.c181
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c233
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c27
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx.c32
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c59
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c18
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotox.c389
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ldsx.c178
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_live_stack.c344
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_loops1.c21
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_lsm.c4
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ptr.c7
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_may_goto_1.c38
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_mul.c38
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c5
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c16
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_scalar_ids.c12
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sock.c87
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c40
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c59
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c47
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_var_off.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_vfs_accept.c2
-rw-r--r--tools/testing/selftests/bpf/progs/wq.c17
-rw-r--r--tools/testing/selftests/bpf/progs/wq_failures.c23
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh4
-rw-r--r--tools/testing/selftests/bpf/test_kmods/Makefile2
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c393
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.c196
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod.h6
-rw-r--r--tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h4
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_user.c2
-rw-r--r--tools/testing/selftests/bpf/test_loader.c329
-rw-r--r--tools/testing/selftests/bpf/test_maps.c3
-rw-r--r--tools/testing/selftests/bpf/test_progs.c13
-rw-r--r--tools/testing/selftests/bpf/test_progs.h17
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c2
-rw-r--r--tools/testing/selftests/bpf/test_tag.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_edt.sh100
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh320
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c20
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh2
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c14
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c234
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h3
-rw-r--r--tools/testing/selftests/bpf/usdt.h545
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_st_mem.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c8
-rwxr-xr-xtools/testing/selftests/bpf/verify_sig_setup.sh11
-rw-r--r--tools/testing/selftests/bpf/veristat.c56
-rw-r--r--tools/testing/selftests/bpf/xdping.c2
-rw-r--r--tools/testing/selftests/bpf/xsk.h4
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c2526
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h156
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test.c2
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test_arm64.c2
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c2
-rw-r--r--tools/testing/selftests/cachestat/.gitignore1
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c6
-rw-r--r--tools/testing/selftests/capabilities/test_execve.c2
-rw-r--r--tools/testing/selftests/capabilities/validate_cap.c2
-rw-r--r--tools/testing/selftests/cgroup/lib/cgroup_util.c12
-rw-r--r--tools/testing/selftests/cgroup/lib/include/cgroup_util.h21
-rw-r--r--tools/testing/selftests/cgroup/test_core.c9
-rw-r--r--tools/testing/selftests/cgroup/test_cpu.c27
-rw-r--r--tools/testing/selftests/cgroup/test_cpuset.c9
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c672
-rw-r--r--tools/testing/selftests/cgroup/test_hugetlb_memcg.c2
-rw-r--r--tools/testing/selftests/cgroup/test_kill.c9
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c9
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c9
-rw-r--r--tools/testing/selftests/cgroup/test_pids.c5
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c9
-rw-r--r--tools/testing/selftests/clone3/clone3.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h2
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c2
-rw-r--r--tools/testing/selftests/connector/proc_filter.c2
-rw-r--r--tools/testing/selftests/core/close_range_test.c2
-rw-r--r--tools/testing/selftests/core/unshare_test.c2
-rw-r--r--tools/testing/selftests/coredump/.gitignore4
-rw-r--r--tools/testing/selftests/coredump/Makefile8
-rw-r--r--tools/testing/selftests/coredump/coredump_socket_protocol_test.c1568
-rw-r--r--tools/testing/selftests/coredump/coredump_socket_test.c742
-rw-r--r--tools/testing/selftests/coredump/coredump_test.h59
-rw-r--r--tools/testing/selftests/coredump/coredump_test_helpers.c383
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c1667
-rw-r--r--tools/testing/selftests/damon/Makefile3
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py11
-rw-r--r--tools/testing/selftests/damon/access_memory_even.c1
-rwxr-xr-xtools/testing/selftests/damon/drgn_dump_damon_status.py9
-rwxr-xr-xtools/testing/selftests/damon/sysfs.py71
-rwxr-xr-xtools/testing/selftests/damon/sysfs_no_op_commit_break.py72
-rw-r--r--tools/testing/selftests/dma/Makefile7
-rw-r--r--tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c2
-rw-r--r--tools/testing/selftests/drivers/dma-buf/udmabuf.c2
-rw-r--r--tools/testing/selftests/drivers/net/.gitignore2
-rw-r--r--tools/testing/selftests/drivers/net/Makefile18
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile20
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_ipsec_offload.sh156
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_lacp_prio.sh108
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh197
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_passive_lacp.sh105
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh3
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh2
-rw-r--r--tools/testing/selftests/drivers/net/bonding/config12
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh361
-rw-r--r--tools/testing/selftests/drivers/net/config7
-rw-r--r--tools/testing/selftests/drivers/net/dsa/Makefile12
-rw-r--r--tools/testing/selftests/drivers/net/gro.c (renamed from tools/testing/selftests/net/gro.c)75
-rwxr-xr-xtools/testing/selftests/drivers/net/gro.py164
-rwxr-xr-xtools/testing/selftests/drivers/net/hds.py42
-rw-r--r--tools/testing/selftests/drivers/net/hw/.gitignore1
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile36
-rw-r--r--tools/testing/selftests/drivers/net/hw/config6
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/csum.py4
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py174
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devmem.py14
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py47
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c856
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/nic_timestamp.py113
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/pp_alloc_fail.py36
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py18
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_flow_label.py167
-rw-r--r--tools/testing/selftests/drivers/net/hw/toeplitz.c (renamed from tools/testing/selftests/net/toeplitz.c)72
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/toeplitz.py211
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py11
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/__init__.py50
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py47
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py84
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh90
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh9
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh12
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh9
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh12
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_threaded.py44
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_basic.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_cmdline.sh55
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_overflow.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_torture.sh130
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/Makefile9
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh116
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh85
-rwxr-xr-xtools/testing/selftests/drivers/net/psp.py640
-rw-r--r--tools/testing/selftests/drivers/net/psp_responder.c483
-rwxr-xr-xtools/testing/selftests/drivers/net/ring_reconfig.py167
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py40
-rw-r--r--tools/testing/selftests/drivers/net/team/Makefile11
-rw-r--r--tools/testing/selftests/drivers/net/team/config1
-rwxr-xr-xtools/testing/selftests/drivers/net/team/options.sh188
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/Makefile13
-rwxr-xr-xtools/testing/selftests/drivers/net/xdp.py163
-rw-r--r--tools/testing/selftests/drivers/ntsync/ntsync.c2
-rw-r--r--tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c2
-rw-r--r--tools/testing/selftests/exec/check-exec.c2
-rw-r--r--tools/testing/selftests/exec/execveat.c2
-rw-r--r--tools/testing/selftests/exec/load_address.c2
-rw-r--r--tools/testing/selftests/exec/non-regular.c2
-rw-r--r--tools/testing/selftests/exec/null-argv.c2
-rw-r--r--tools/testing/selftests/exec/recursion-depth.c2
-rw-r--r--tools/testing/selftests/fchmodat2/fchmodat2_test.c2
-rw-r--r--tools/testing/selftests/filelock/ofdlocks.c2
-rw-r--r--tools/testing/selftests/filesystems/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/anon_inode_test.c2
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c3
-rw-r--r--tools/testing/selftests/filesystems/devpts_pts.c2
-rw-r--r--tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c2
-rw-r--r--tools/testing/selftests/filesystems/eventfd/eventfd_test.c2
-rw-r--r--tools/testing/selftests/filesystems/fclog.c130
-rw-r--r--tools/testing/selftests/filesystems/file_stressor.c2
-rw-r--r--tools/testing/selftests/filesystems/fuse/.gitignore3
-rw-r--r--tools/testing/selftests/filesystems/fuse/Makefile21
-rw-r--r--tools/testing/selftests/filesystems/fuse/fuse_mnt.c146
-rw-r--r--tools/testing/selftests/filesystems/fuse/fusectl_test.c140
-rw-r--r--tools/testing/selftests/filesystems/kernfs_test.c2
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c19
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c20
-rw-r--r--tools/testing/selftests/filesystems/nsfs/iterate_mntns.c2
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c2
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/listmount_test.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c2
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c2
-rw-r--r--tools/testing/selftests/filesystems/utils.c4
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest34
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/trace_marker_raw.tc107
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc18
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/enable_disable_tprobe.tc40
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions6
-rw-r--r--tools/testing/selftests/futex/functional/Makefile8
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa.c3
-rw-r--r--tools/testing/selftests/futex/functional/futex_numa_mpol.c100
-rw-r--r--tools/testing/selftests/futex/functional/futex_priv_hash.c67
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c266
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c86
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c129
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait.c103
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c83
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c139
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c76
-rw-r--r--tools/testing/selftests/futex/functional/futex_waitv.c99
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh62
-rw-r--r--tools/testing/selftests/futex/include/futextest.h11
-rw-r--r--tools/testing/selftests/futex/include/logging.h148
-rw-r--r--tools/testing/selftests/hid/hid_common.h8
-rw-r--r--tools/testing/selftests/hid/hidraw.c473
-rw-r--r--tools/testing/selftests/hid/tests/test_multitouch.py55
-rw-r--r--tools/testing/selftests/hid/tests/test_tablet.py71
-rwxr-xr-xtools/testing/selftests/hid/vmtest.sh668
-rw-r--r--tools/testing/selftests/intel_pstate/aperf.c2
-rw-r--r--tools/testing/selftests/iommu/iommufd.c105
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c2
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h66
-rw-r--r--tools/testing/selftests/ipc/msgque.c2
-rw-r--r--tools/testing/selftests/ir/ir_loopback.c2
-rw-r--r--tools/testing/selftests/kcmp/kcmp_test.c2
-rw-r--r--tools/testing/selftests/kexec/.gitignore2
-rw-r--r--tools/testing/selftests/kho/init.c13
-rwxr-xr-xtools/testing/selftests/kho/vmtest.sh29
-rw-r--r--tools/testing/selftests/kselftest.h22
-rw-r--r--tools/testing/selftests/kselftest/runner.sh14
-rw-r--r--tools/testing/selftests/kselftest_harness.h19
-rw-r--r--tools/testing/selftests/kselftest_harness/Makefile1
-rw-r--r--tools/testing/selftests/kselftest_harness/harness-selftest.c2
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/Makefile.kvm22
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c1
-rw-r--r--tools/testing/selftests/kvm/arm64/aarch32_id_regs.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer.c13
-rw-r--r--tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c15
-rw-r--r--tools/testing/selftests/kvm/arm64/at.c166
-rw-r--r--tools/testing/selftests/kvm/arm64/debug-exceptions.c12
-rw-r--r--tools/testing/selftests/kvm/arm64/external_aborts.c85
-rw-r--r--tools/testing/selftests/kvm/arm64/get-reg-list.c102
-rw-r--r--tools/testing/selftests/kvm/arm64/hello_el2.c71
-rw-r--r--tools/testing/selftests/kvm/arm64/hypercalls.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/kvm-uuid.c70
-rw-r--r--tools/testing/selftests/kvm/arm64/no-vgic-v3.c6
-rw-r--r--tools/testing/selftests/kvm/arm64/page_fault_test.c6
-rw-r--r--tools/testing/selftests/kvm/arm64/psci_test.c13
-rw-r--r--tools/testing/selftests/kvm/arm64/sea_to_user.c331
-rw-r--r--tools/testing/selftests/kvm/arm64/set_id_regs.c64
-rw-r--r--tools/testing/selftests/kvm/arm64/smccc_filter.c17
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_init.c2
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_irq.c291
-rw-r--r--tools/testing/selftests/kvm/arm64/vgic_lpi_stress.c15
-rw-r--r--tools/testing/selftests/kvm/arm64/vpmu_counter_access.c77
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c35
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c1
-rw-r--r--tools/testing/selftests/kvm/get-reg-list.c9
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c367
-rw-r--r--tools/testing/selftests/kvm/include/arm64/arch_timer.h24
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic.h1
-rw-r--r--tools/testing/selftests/kvm/include/arm64/gic_v3_its.h1
-rw-r--r--tools/testing/selftests/kvm/include/arm64/kvm_util_arch.h5
-rw-r--r--tools/testing/selftests/kvm/include/arm64/processor.h84
-rw-r--r--tools/testing/selftests/kvm/include/arm64/vgic.h3
-rw-r--r--tools/testing/selftests/kvm/include/kvm_syscalls.h81
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h46
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/arch_timer.h85
-rw-r--r--tools/testing/selftests/kvm/include/loongarch/processor.h81
-rw-r--r--tools/testing/selftests/kvm/include/numaif.h110
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h1
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h19
-rw-r--r--tools/testing/selftests/kvm/include/x86/pmu.h26
-rw-r--r--tools/testing/selftests/kvm/include/x86/processor.h42
-rw-r--r--tools/testing/selftests/kvm/include/x86/vmx.h3
-rw-r--r--tools/testing/selftests/kvm/irqfd_test.c14
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c4
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic.c6
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_private.h1
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3.c22
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c19
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/processor.c117
-rw-r--r--tools/testing/selftests/kvm/lib/arm64/vgic.c64
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c251
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/exception.S6
-rw-r--r--tools/testing/selftests/kvm/lib/loongarch/processor.c47
-rw-r--r--tools/testing/selftests/kvm/lib/s390/processor.c5
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c7
-rw-r--r--tools/testing/selftests/kvm/lib/x86/memstress.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86/pmu.c49
-rw-r--r--tools/testing/selftests/kvm/lib/x86/processor.c128
-rw-r--r--tools/testing/selftests/kvm/lib/x86/vmx.c9
-rw-r--r--tools/testing/selftests/kvm/loongarch/arch_timer.c200
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c1
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c1
-rw-r--r--tools/testing/selftests/kvm/mmu_stress_test.c15
-rw-r--r--tools/testing/selftests/kvm/pre_fault_memory_test.c157
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c64
-rw-r--r--tools/testing/selftests/kvm/s390/cmma_test.c2
-rw-r--r--tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c2
-rw-r--r--tools/testing/selftests/kvm/s390/ucontrol_test.c16
-rw-r--r--tools/testing/selftests/kvm/s390/user_operexec.c140
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c17
-rw-r--r--tools/testing/selftests/kvm/steal_time.c2
-rw-r--r--tools/testing/selftests/kvm/x86/fastops_test.c82
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_cpuid.c2
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_features.c18
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_ipi.c18
-rw-r--r--tools/testing/selftests/kvm/x86/hyperv_tlb_flush.c2
-rw-r--r--tools/testing/selftests/kvm/x86/monitor_mwait_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86/msrs_test.c489
-rw-r--r--tools/testing/selftests/kvm/x86/nested_close_kvm_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_close_while_nested_test.c)42
-rw-r--r--tools/testing/selftests/kvm/x86/nested_invalid_cr3_test.c116
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_adjust_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_tsc_adjust_test.c)79
-rw-r--r--tools/testing/selftests/kvm/x86/nested_tsc_scaling_test.c (renamed from tools/testing/selftests/kvm/x86/vmx_nested_tsc_scaling_test.c)48
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_counters_test.c75
-rw-r--r--tools/testing/selftests/kvm/x86/pmu_event_filter_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/private_mem_conversions_test.c9
-rw-r--r--tools/testing/selftests/kvm/x86/sev_smoke_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/state_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/userspace_io_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_dirty_log_test.c12
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_nested_la57_state_test.c132
-rw-r--r--tools/testing/selftests/kvm/x86/vmx_pmu_caps_test.c7
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_ipi_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86/xapic_state_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86/xcr0_cpuid_test.c12
-rw-r--r--tools/testing/selftests/landlock/Makefile2
-rw-r--r--tools/testing/selftests/landlock/audit.h6
-rw-r--r--tools/testing/selftests/landlock/common.h6
-rw-r--r--tools/testing/selftests/landlock/fs_test.c1474
-rw-r--r--tools/testing/selftests/lib.mk8
-rw-r--r--tools/testing/selftests/livepatch/functions.sh6
-rw-r--r--tools/testing/selftests/liveupdate/.gitignore9
-rw-r--r--tools/testing/selftests/liveupdate/Makefile34
-rw-r--r--tools/testing/selftests/liveupdate/config11
-rwxr-xr-xtools/testing/selftests/liveupdate/do_kexec.sh16
-rw-r--r--tools/testing/selftests/liveupdate/liveupdate.c348
-rw-r--r--tools/testing/selftests/liveupdate/luo_kexec_simple.c89
-rw-r--r--tools/testing/selftests/liveupdate/luo_multi_session.c162
-rw-r--r--tools/testing/selftests/liveupdate/luo_test_utils.c266
-rw-r--r--tools/testing/selftests/liveupdate/luo_test_utils.h44
-rw-r--r--tools/testing/selftests/lsm/lsm_get_self_attr_test.c2
-rw-r--r--tools/testing/selftests/lsm/lsm_list_modules_test.c2
-rw-r--r--tools/testing/selftests/lsm/lsm_set_self_attr_test.c2
-rw-r--r--tools/testing/selftests/media_tests/media_device_open.c2
-rw-r--r--tools/testing/selftests/media_tests/media_device_test.c2
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_impl.h2
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c4
-rw-r--r--tools/testing/selftests/mm/.gitignore2
-rw-r--r--tools/testing/selftests/mm/Makefile5
-rw-r--r--tools/testing/selftests/mm/compaction_test.c2
-rw-r--r--tools/testing/selftests/mm/cow.c19
-rw-r--r--tools/testing/selftests/mm/droppable.c2
-rw-r--r--tools/testing/selftests/mm/guard-regions.c189
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c2
-rw-r--r--tools/testing/selftests/mm/gup_test.c28
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c926
-rw-r--r--tools/testing/selftests/mm/hugepage-mmap.c2
-rw-r--r--tools/testing/selftests/mm/hugepage-mremap.c18
-rw-r--r--tools/testing/selftests/mm/hugetlb-madvise.c6
-rw-r--r--tools/testing/selftests/mm/hugetlb-read-hwpoison.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb-soft-offline.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb_dio.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb_fault_after_madv.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb_madv_vs_map.c2
-rw-r--r--tools/testing/selftests/mm/khugepaged.c2
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c258
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c2
-rw-r--r--tools/testing/selftests/mm/madv_populate.c23
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c2
-rw-r--r--tools/testing/selftests/mm/map_hugetlb.c2
-rw-r--r--tools/testing/selftests/mm/map_populate.c2
-rw-r--r--tools/testing/selftests/mm/mdwe_test.c2
-rw-r--r--tools/testing/selftests/mm/memfd_secret.c2
-rw-r--r--tools/testing/selftests/mm/merge.c2
-rw-r--r--tools/testing/selftests/mm/migration.c4
-rw-r--r--tools/testing/selftests/mm/mkdirty.c2
-rw-r--r--tools/testing/selftests/mm/mlock-random-test.c2
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c2
-rw-r--r--tools/testing/selftests/mm/mrelease_test.c2
-rw-r--r--tools/testing/selftests/mm/mremap_dontunmap.c2
-rw-r--r--tools/testing/selftests/mm/mremap_test.c271
-rw-r--r--tools/testing/selftests/mm/mseal_test.c2
-rw-r--r--tools/testing/selftests/mm/on-fault-limit.c2
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c28
-rw-r--r--tools/testing/selftests/mm/pfnmap.c50
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h5
-rw-r--r--tools/testing/selftests/mm/pkey_sighandler_tests.c2
-rw-r--r--tools/testing/selftests/mm/prctl_thp_disable.c291
-rw-r--r--tools/testing/selftests/mm/process_madv.c2
-rw-r--r--tools/testing/selftests/mm/protection_keys.c6
-rw-r--r--tools/testing/selftests/mm/rmap.c433
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh31
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c132
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c478
-rwxr-xr-xtools/testing/selftests/mm/test_vmalloc.sh6
-rw-r--r--tools/testing/selftests/mm/thp_settings.c9
-rw-r--r--tools/testing/selftests/mm/thp_settings.h1
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c13
-rw-r--r--tools/testing/selftests/mm/transhuge-stress.c2
-rw-r--r--tools/testing/selftests/mm/uffd-common.c293
-rw-r--r--tools/testing/selftests/mm/uffd-common.h80
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c245
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c578
-rw-r--r--tools/testing/selftests/mm/uffd-wp-mremap.c31
-rw-r--r--tools/testing/selftests/mm/va_high_addr_switch.c6
-rwxr-xr-xtools/testing/selftests/mm/va_high_addr_switch.sh37
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c15
-rw-r--r--tools/testing/selftests/mm/vm_util.c174
-rw-r--r--tools/testing/selftests/mm/vm_util.h23
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c79
-rw-r--r--tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c2
-rw-r--r--tools/testing/selftests/mqueue/mq_open_tests.c2
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c2
-rw-r--r--tools/testing/selftests/mseal_system_mappings/sysmap_is_sealed.c4
-rw-r--r--tools/testing/selftests/namespaces/.gitignore12
-rw-r--r--tools/testing/selftests/namespaces/Makefile29
-rw-r--r--tools/testing/selftests/namespaces/config7
-rw-r--r--tools/testing/selftests/namespaces/cred_change_test.c814
-rw-r--r--tools/testing/selftests/namespaces/file_handle_test.c1429
-rw-r--r--tools/testing/selftests/namespaces/init_ino_test.c61
-rw-r--r--tools/testing/selftests/namespaces/listns_efault_test.c530
-rw-r--r--tools/testing/selftests/namespaces/listns_pagination_bug.c138
-rw-r--r--tools/testing/selftests/namespaces/listns_permissions_test.c759
-rw-r--r--tools/testing/selftests/namespaces/listns_test.c679
-rw-r--r--tools/testing/selftests/namespaces/ns_active_ref_test.c2672
-rw-r--r--tools/testing/selftests/namespaces/nsid_test.c981
-rw-r--r--tools/testing/selftests/namespaces/regression_pidfd_setns_test.c113
-rw-r--r--tools/testing/selftests/namespaces/siocgskns_test.c1824
-rw-r--r--tools/testing/selftests/namespaces/stress_test.c626
-rw-r--r--tools/testing/selftests/namespaces/wrappers.h35
-rw-r--r--tools/testing/selftests/nci/nci_dev.c2
-rw-r--r--tools/testing/selftests/net/.gitignore10
-rw-r--r--tools/testing/selftests/net/Makefile303
-rw-r--r--tools/testing/selftests/net/af_unix/.gitignore8
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile14
-rw-r--r--tools/testing/selftests/net/af_unix/config2
-rw-r--r--tools/testing/selftests/net/af_unix/diag_uid.c2
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c2
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c28
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c4
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c30
-rw-r--r--tools/testing/selftests/net/af_unix/so_peek_off.c162
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connect.c2
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connreset.c180
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh2
-rwxr-xr-xtools/testing/selftests/net/bareudp.sh2
-rw-r--r--tools/testing/selftests/net/bind_bhash.c4
-rw-r--r--tools/testing/selftests/net/bind_timewait.c2
-rw-r--r--tools/testing/selftests/net/bind_wildcard.c2
-rwxr-xr-xtools/testing/selftests/net/bpf_offload.py4
-rwxr-xr-xtools/testing/selftests/net/broadcast_ether_dst.sh83
-rwxr-xr-xtools/testing/selftests/net/busy_poll_test.sh24
-rw-r--r--tools/testing/selftests/net/busy_poller.c16
-rw-r--r--tools/testing/selftests/net/can/config3
-rw-r--r--tools/testing/selftests/net/can/test_raw_filter.c2
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c12
-rw-r--r--tools/testing/selftests/net/config139
-rw-r--r--tools/testing/selftests/net/epoll_busy_poll.c2
-rwxr-xr-xtools/testing/selftests/net/fcnal-ipv4.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-ipv6.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-other.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh435
-rwxr-xr-xtools/testing/selftests/net/fdb_notify.sh26
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh52
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh66
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile57
-rw-r--r--tools/testing/selftests/net/forwarding/README15
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_activity_notify.sh170
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh387
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh100
-rw-r--r--tools/testing/selftests/net/forwarding/config30
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh50
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib_sh_test.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh29
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_ets.sh1
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_core.sh9
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh12
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_core.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh141
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_reserved.sh33
-rwxr-xr-xtools/testing/selftests/net/gro.sh105
-rw-r--r--tools/testing/selftests/net/hsr/Makefile6
-rw-r--r--tools/testing/selftests/net/hsr/config4
-rw-r--r--tools/testing/selftests/net/io_uring_zerocopy_tx.c24
-rw-r--r--tools/testing/selftests/net/ip_local_port_range.c2
-rw-r--r--tools/testing/selftests/net/ipsec.c2
-rw-r--r--tools/testing/selftests/net/ipv6_fragmentation.c114
-rw-r--r--tools/testing/selftests/net/lib.sh74
-rw-r--r--tools/testing/selftests/net/lib/Makefile15
-rwxr-xr-xtools/testing/selftests/net/lib/ksft_setup_loopback.sh111
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py32
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py115
-rw-r--r--tools/testing/selftests/net/lib/py/nsim.py2
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py65
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py5
-rw-r--r--tools/testing/selftests/net/lib/sh/defer.sh20
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c103
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile32
-rw-r--r--tools/testing/selftests/net/mptcp/config44
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh2
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c44
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh154
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c14
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh582
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh81
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c30
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh45
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh6
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c25
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh46
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh19
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile89
-rw-r--r--tools/testing/selftests/net/netfilter/config51
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_clash.sh2
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_dump_flush.c2
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_resize.sh5
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_nat_edemux.sh58
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh56
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh13
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh213
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat.sh4
-rw-r--r--tools/testing/selftests/net/netfilter/sctp_collision.c3
-rw-r--r--tools/testing/selftests/net/netfilter/udpclash.c2
-rw-r--r--tools/testing/selftests/net/netlink-dumps.c46
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh88
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py2
-rw-r--r--tools/testing/selftests/net/ovpn/Makefile12
-rw-r--r--tools/testing/selftests/net/ovpn/config12
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c5
-rw-r--r--tools/testing/selftests/net/packetdrill/Makefile10
-rw-r--r--tools/testing/selftests/net/packetdrill/config4
-rwxr-xr-xtools/testing/selftests/net/packetdrill/defaults.sh3
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh53
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt46
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt49
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt74
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt4
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt6
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt2
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh9
-rw-r--r--tools/testing/selftests/net/proc_net_pktgen.c2
-rw-r--r--tools/testing/selftests/net/psock_fanout.c2
-rw-r--r--tools/testing/selftests/net/psock_lib.h4
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c6
-rw-r--r--tools/testing/selftests/net/rds/Makefile10
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c2
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c2
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c2
-rwxr-xr-xtools/testing/selftests/net/route_hint.sh79
-rwxr-xr-xtools/testing/selftests/net/rps_default_mask.sh12
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh37
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c2
-rw-r--r--tools/testing/selftests/net/sctp_hello.c17
-rwxr-xr-xtools/testing/selftests/net/sctp_vrf.sh73
-rw-r--r--tools/testing/selftests/net/setup_loopback.sh120
-rw-r--r--tools/testing/selftests/net/setup_veth.sh45
-rw-r--r--tools/testing/selftests/net/sk_so_peek_off.c2
-rw-r--r--tools/testing/selftests/net/so_incoming_cpu.c2
-rw-r--r--tools/testing/selftests/net/so_txtime.c2
-rw-r--r--tools/testing/selftests/net/socket.c13
-rw-r--r--tools/testing/selftests/net/tap.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/config2
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/setup.c2
-rw-r--r--tools/testing/selftests/net/tcp_fastopen_backup_key.c2
-rw-r--r--tools/testing/selftests/net/tcp_port_share.c258
-rwxr-xr-xtools/testing/selftests/net/test_bridge_backup_port.sh31
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_fdb_changelink.sh8
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_nh.sh223
-rwxr-xr-xtools/testing/selftests/net/tfo_passive.sh2
-rw-r--r--tools/testing/selftests/net/tls.c590
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh199
-rwxr-xr-xtools/testing/selftests/net/toeplitz_client.sh28
-rwxr-xr-xtools/testing/selftests/net/traceroute.sh561
-rw-r--r--tools/testing/selftests/net/tun.c2
-rw-r--r--tools/testing/selftests/net/txtimestamp.c2
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c2
-rwxr-xr-xtools/testing/selftests/net/vlan_bridge_binding.sh46
-rw-r--r--tools/testing/selftests/net/ynl.mk5
-rw-r--r--tools/testing/selftests/nolibc/Makefile.nolibc23
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c19
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh8
-rw-r--r--tools/testing/selftests/openat2/helpers.h2
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c2
-rw-r--r--tools/testing/selftests/openat2/rename_attack_test.c2
-rw-r--r--tools/testing/selftests/openat2/resolve_test.c2
-rw-r--r--tools/testing/selftests/pci_endpoint/pci_endpoint_test.c6
-rw-r--r--tools/testing/selftests/perf_events/mmap.c2
-rw-r--r--tools/testing/selftests/perf_events/remove_on_exec.c2
-rw-r--r--tools/testing/selftests/perf_events/sigtrap_threads.c2
-rw-r--r--tools/testing/selftests/perf_events/watermark_signal.c4
-rw-r--r--tools/testing/selftests/pid_namespace/pid_max.c2
-rw-r--r--tools/testing/selftests/pid_namespace/regression_enomem.c2
-rw-r--r--tools/testing/selftests/pidfd/config1
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h17
-rw-r--r--tools/testing/selftests/pidfd/pidfd_bind_mount.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_file_handle_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_getfd_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_info_test.c75
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_poll_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setattr_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_xattr_test.c2
-rw-r--r--tools/testing/selftests/powerpc/include/instructions.h2
-rw-r--r--tools/testing/selftests/prctl/set-anon-vma-name-test.c2
-rw-r--r--tools/testing/selftests/prctl/set-process-name.c2
-rw-r--r--tools/testing/selftests/proc/.gitignore2
-rw-r--r--tools/testing/selftests/proc/Makefile2
-rw-r--r--tools/testing/selftests/proc/proc-maps-race.c73
-rw-r--r--tools/testing/selftests/proc/proc-net-dev-lseek.c68
-rw-r--r--tools/testing/selftests/proc/proc-pid-vm.c14
-rw-r--r--tools/testing/selftests/proc/proc-pidns.c211
-rw-r--r--tools/testing/selftests/ptrace/get_set_sud.c2
-rw-r--r--tools/testing/selftests/ptrace/get_syscall_info.c2
-rw-r--r--tools/testing/selftests/ptrace/set_syscall_info.c2
-rw-r--r--tools/testing/selftests/ptrace/vmaccess.c2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh27
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh56
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-series.sh116
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE041
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h2
-rw-r--r--tools/testing/selftests/ring-buffer/map_test.c2
-rw-r--r--tools/testing/selftests/riscv/README24
-rw-r--r--tools/testing/selftests/riscv/abi/pointer_masking.c2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/cbo.c167
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.c2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/which-cpus.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_bottomup.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_default.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_test.h2
-rw-r--r--tools/testing/selftests/riscv/sigreturn/sigreturn.c2
-rw-r--r--tools/testing/selftests/riscv/vector/Makefile5
-rw-r--r--tools/testing/selftests/riscv/vector/v_initval.c2
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_prctl.c2
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_ptrace.c134
-rw-r--r--tools/testing/selftests/rseq/basic_percpu_ops_test.c2
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv.h3
-rw-r--r--tools/testing/selftests/rseq/rseq-s390.h39
-rw-r--r--tools/testing/selftests/rseq/rseq.c10
-rw-r--r--tools/testing/selftests/rtc/rtctest.c2
-rwxr-xr-xtools/testing/selftests/run_kselftest.sh14
-rw-r--r--tools/testing/selftests/sched_ext/Makefile1
-rw-r--r--tools/testing/selftests/sched_ext/hotplug.c1
-rw-r--r--tools/testing/selftests/sched_ext/peek_dsq.bpf.c251
-rw-r--r--tools/testing/selftests/sched_ext/peek_dsq.c224
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c240
-rw-r--r--tools/testing/selftests/sgx/main.c2
-rw-r--r--tools/testing/selftests/signal/mangle_uc_sigmask.c2
-rw-r--r--tools/testing/selftests/signal/sas.c2
-rw-r--r--tools/testing/selftests/sparc64/drivers/adi-test.c2
-rw-r--r--tools/testing/selftests/sync/sync_test.c2
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_test.c2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json2
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json270
-rw-r--r--tools/testing/selftests/tdx/tdx_guest_test.c2
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c2
-rw-r--r--tools/testing/selftests/timens/timens.h2
-rw-r--r--tools/testing/selftests/timers/adjtick.c2
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c2
-rw-r--r--tools/testing/selftests/timers/change_skew.c2
-rw-r--r--tools/testing/selftests/timers/clocksource-switch.c2
-rw-r--r--tools/testing/selftests/timers/freq-step.c2
-rw-r--r--tools/testing/selftests/timers/inconsistency-check.c2
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c2
-rw-r--r--tools/testing/selftests/timers/leapcrash.c2
-rw-r--r--tools/testing/selftests/timers/mqueue-lat.c2
-rw-r--r--tools/testing/selftests/timers/nanosleep.c57
-rw-r--r--tools/testing/selftests/timers/nsleep-lat.c2
-rw-r--r--tools/testing/selftests/timers/posix_timers.c34
-rw-r--r--tools/testing/selftests/timers/raw_skew.c2
-rw-r--r--tools/testing/selftests/timers/rtcpie.c2
-rw-r--r--tools/testing/selftests/timers/set-2038.c2
-rw-r--r--tools/testing/selftests/timers/set-tai.c2
-rw-r--r--tools/testing/selftests/timers/set-timer-lat.c2
-rw-r--r--tools/testing/selftests/timers/set-tz.c2
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c2
-rw-r--r--tools/testing/selftests/timers/threadtest.c2
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c2
-rw-r--r--tools/testing/selftests/tmpfs/bug-link-o-tmpfile.c2
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py4
-rw-r--r--tools/testing/selftests/tty/.gitignore1
-rw-r--r--tools/testing/selftests/tty/Makefile6
-rw-r--r--tools/testing/selftests/tty/config1
-rw-r--r--tools/testing/selftests/tty/tty_tiocsti_test.c650
-rw-r--r--tools/testing/selftests/tty/tty_tstamp_update.c2
-rw-r--r--tools/testing/selftests/ublk/Makefile1
-rw-r--r--tools/testing/selftests/ublk/file_backed.c10
-rw-r--r--tools/testing/selftests/ublk/kublk.c142
-rw-r--r--tools/testing/selftests/ublk/kublk.h54
-rw-r--r--tools/testing/selftests/ublk/null.c4
-rw-r--r--tools/testing/selftests/ublk/stripe.c4
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_01.sh4
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_02.sh4
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_12.sh4
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_13.sh20
-rwxr-xr-xtools/testing/selftests/ublk/test_null_01.sh4
-rwxr-xr-xtools/testing/selftests/ublk/test_null_02.sh4
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_04.sh6
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_05.sh4
-rw-r--r--tools/testing/selftests/ublk/utils.h2
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c2
-rw-r--r--tools/testing/selftests/user_events/abi_test.c2
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c2
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c2
-rw-r--r--tools/testing/selftests/user_events/perf_test.c4
-rw-r--r--tools/testing/selftests/user_events/user_events_selftests.h2
-rw-r--r--tools/testing/selftests/vDSO/.gitignore1
-rw-r--r--tools/testing/selftests/vDSO/Makefile2
-rw-r--r--tools/testing/selftests/vDSO/vdso_call.h7
-rw-r--r--tools/testing/selftests/vDSO/vdso_config.h4
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_abi.c103
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_chacha.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_clock_getres.c123
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_correctness.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getcpu.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getrandom.c2
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_gettimeofday.c2
-rw-r--r--tools/testing/selftests/verification/.gitignore (renamed from tools/bpf/runqslower/.gitignore)2
-rw-r--r--tools/testing/selftests/verification/Makefile8
-rw-r--r--tools/testing/selftests/verification/config1
-rw-r--r--tools/testing/selftests/verification/settings1
-rw-r--r--tools/testing/selftests/verification/test.d/functions39
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitor_enable_disable.tc75
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitor_reactor.tc68
-rw-r--r--tools/testing/selftests/verification/test.d/rv_monitors_available.tc18
-rw-r--r--tools/testing/selftests/verification/test.d/rv_wwnr_printk.tc30
-rwxr-xr-xtools/testing/selftests/verification/verificationtest-ktap8
-rw-r--r--tools/testing/selftests/vfio/.gitignore10
-rw-r--r--tools/testing/selftests/vfio/Makefile29
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/dsa/dsa.c416
l---------tools/testing/selftests/vfio/lib/drivers/dsa/registers.h1
l---------tools/testing/selftests/vfio/lib/drivers/ioat/hw.h1
-rw-r--r--tools/testing/selftests/vfio/lib/drivers/ioat/ioat.c235
l---------tools/testing/selftests/vfio/lib/drivers/ioat/registers.h1
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio.h26
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/assert.h54
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iommu.h76
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/iova_allocator.h23
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_device.h125
-rw-r--r--tools/testing/selftests/vfio/lib/include/libvfio/vfio_pci_driver.h97
-rw-r--r--tools/testing/selftests/vfio/lib/iommu.c465
-rw-r--r--tools/testing/selftests/vfio/lib/iova_allocator.c94
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.c78
-rw-r--r--tools/testing/selftests/vfio/lib/libvfio.mk29
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_device.c378
-rw-r--r--tools/testing/selftests/vfio/lib/vfio_pci_driver.c112
-rwxr-xr-xtools/testing/selftests/vfio/scripts/cleanup.sh41
-rwxr-xr-xtools/testing/selftests/vfio/scripts/lib.sh42
-rwxr-xr-xtools/testing/selftests/vfio/scripts/run.sh16
-rwxr-xr-xtools/testing/selftests/vfio/scripts/setup.sh48
-rw-r--r--tools/testing/selftests/vfio/vfio_dma_mapping_test.c312
-rw-r--r--tools/testing/selftests/vfio/vfio_iommufd_setup_test.c127
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_init_perf_test.c168
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_device_test.c182
-rw-r--r--tools/testing/selftests/vfio/vfio_pci_driver_test.c263
-rwxr-xr-xtools/testing/selftests/vsock/vmtest.sh354
-rw-r--r--tools/testing/selftests/watchdog/watchdog-test.c6
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config10
-rw-r--r--tools/testing/selftests/x86/corrupt_xstate_header.c2
-rw-r--r--tools/testing/selftests/x86/helpers.h2
-rw-r--r--tools/testing/selftests/x86/lam.c2
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c2
-rw-r--r--tools/testing/selftests/x86/test_mremap_vdso.c2
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c23
-rw-r--r--tools/testing/selftests/x86/xstate.h2
-rw-r--r--tools/testing/selftests/zram/README1
-rw-r--r--tools/testing/shared/linux.c120
-rw-r--r--tools/testing/shared/linux/idr.h4
-rw-r--r--tools/testing/shared/linux/maple_tree.h6
-rw-r--r--tools/testing/shared/maple-shared.h11
-rw-r--r--tools/testing/shared/maple-shim.c7
-rw-r--r--tools/testing/shared/shared.mk6
-rw-r--r--tools/testing/vma/linux/atomic.h17
-rw-r--r--tools/testing/vma/vma.c112
-rw-r--r--tools/testing/vma/vma_internal.h933
-rw-r--r--tools/testing/vsock/util.c1
-rw-r--r--tools/testing/vsock/vsock_test.c7
-rw-r--r--tools/thermal/thermal-engine/thermal-engine.c2
-rw-r--r--tools/tracing/latency/Makefile.config8
-rw-r--r--tools/tracing/latency/latency-collector.c2
-rw-r--r--tools/tracing/rtla/Makefile.config8
-rw-r--r--tools/tracing/rtla/Makefile.rtla2
-rw-r--r--tools/tracing/rtla/src/Build1
-rw-r--r--tools/tracing/rtla/src/actions.c12
-rw-r--r--tools/tracing/rtla/src/actions.h2
-rw-r--r--tools/tracing/rtla/src/common.c350
-rw-r--r--tools/tracing/rtla/src/common.h158
-rw-r--r--tools/tracing/rtla/src/osnoise.c101
-rw-r--r--tools/tracing/rtla/src/osnoise.h114
-rw-r--r--tools/tracing/rtla/src/osnoise_hist.c463
-rw-r--r--tools/tracing/rtla/src/osnoise_top.c379
-rw-r--r--tools/tracing/rtla/src/timerlat.bpf.c3
-rw-r--r--tools/tracing/rtla/src/timerlat.c206
-rw-r--r--tools/tracing/rtla/src/timerlat.h55
-rw-r--r--tools/tracing/rtla/src/timerlat_bpf.c22
-rw-r--r--tools/tracing/rtla/src/timerlat_hist.c768
-rw-r--r--tools/tracing/rtla/src/timerlat_top.c678
-rw-r--r--tools/tracing/rtla/src/timerlat_u.c12
-rw-r--r--tools/tracing/rtla/src/trace.h3
-rw-r--r--tools/tracing/rtla/src/utils.c41
-rw-r--r--tools/tracing/rtla/src/utils.h2
-rw-r--r--tools/tracing/rtla/tests/engine.sh26
-rw-r--r--tools/tracing/rtla/tests/osnoise.t27
-rw-r--r--tools/tracing/rtla/tests/timerlat.t10
-rw-r--r--tools/usb/usbip/src/usbipd.c4
-rw-r--r--tools/virtio/linux/compiler.h2
-rw-r--r--tools/virtio/linux/kmsan.h2
1882 files changed, 121201 insertions, 29420 deletions
diff --git a/tools/Makefile b/tools/Makefile
index c31cbbd12c45..cb40961a740f 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -14,6 +14,7 @@ help:
@echo ' counter - counter tools'
@echo ' cpupower - a tool for all things x86 CPU power'
@echo ' debugging - tools for debugging'
+ @echo ' dma - tools for DMA mapping'
@echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer'
@echo ' firmware - Firmware tools'
@echo ' freefall - laptop accelerometer program for disk protection'
@@ -69,7 +70,7 @@ acpi: FORCE
cpupower: FORCE
$(call descend,power/$@)
-counter firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi firmware debugging tracing: FORCE
+counter dma firewire hv guest bootconfig spi usb virtio mm bpf iio gpio objtool leds wmi firmware debugging tracing: FORCE
$(call descend,$@)
bpf/%: FORCE
@@ -122,7 +123,7 @@ kvm_stat: FORCE
ynl: FORCE
$(call descend,net/ynl)
-all: acpi counter cpupower gpio hv firewire \
+all: acpi counter cpupower dma gpio hv firewire \
perf selftests bootconfig spi turbostat usb \
virtio mm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
@@ -134,7 +135,7 @@ acpi_install:
cpupower_install:
$(call descend,power/$(@:_install=),install)
-counter_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install debugging_install tracing_install:
+counter_install dma_install firewire_install gpio_install hv_install iio_install perf_install bootconfig_install spi_install usb_install virtio_install mm_install bpf_install objtool_install wmi_install debugging_install tracing_install:
$(call descend,$(@:_install=),install)
selftests_install:
@@ -164,7 +165,7 @@ kvm_stat_install:
ynl_install:
$(call descend,net/$(@:_install=),install)
-install: acpi_install counter_install cpupower_install gpio_install \
+install: acpi_install counter_install cpupower_install dma_install gpio_install \
hv_install firewire_install iio_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install mm_install bpf_install x86_energy_perf_policy_install \
@@ -178,7 +179,7 @@ acpi_clean:
cpupower_clean:
$(call descend,power/cpupower,clean)
-counter_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean firmware_clean debugging_clean tracing_clean:
+counter_clean dma_clean hv_clean firewire_clean bootconfig_clean spi_clean usb_clean virtio_clean mm_clean wmi_clean bpf_clean iio_clean gpio_clean objtool_clean leds_clean firmware_clean debugging_clean tracing_clean:
$(call descend,$(@:_clean=),clean)
libapi_clean:
@@ -224,7 +225,7 @@ build_clean:
ynl_clean:
$(call descend,net/$(@:_clean=),clean)
-clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
+clean: acpi_clean counter_clean cpupower_clean dma_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \
diff --git a/tools/accounting/delaytop.c b/tools/accounting/delaytop.c
index 9afb1ffc00ba..72cc500b44b1 100644
--- a/tools/accounting/delaytop.c
+++ b/tools/accounting/delaytop.c
@@ -42,14 +42,13 @@
#include <linux/genetlink.h>
#include <linux/taskstats.h>
#include <linux/cgroupstats.h>
+#include <stddef.h>
-#define PSI_CPU_SOME "/proc/pressure/cpu"
-#define PSI_CPU_FULL "/proc/pressure/cpu"
-#define PSI_MEMORY_SOME "/proc/pressure/memory"
-#define PSI_MEMORY_FULL "/proc/pressure/memory"
-#define PSI_IO_SOME "/proc/pressure/io"
-#define PSI_IO_FULL "/proc/pressure/io"
-#define PSI_IRQ_FULL "/proc/pressure/irq"
+#define PSI_PATH "/proc/pressure"
+#define PSI_CPU_PATH "/proc/pressure/cpu"
+#define PSI_MEMORY_PATH "/proc/pressure/memory"
+#define PSI_IO_PATH "/proc/pressure/io"
+#define PSI_IRQ_PATH "/proc/pressure/irq"
#define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len)))
#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
@@ -61,24 +60,28 @@
#define TASK_COMM_LEN 16
#define MAX_MSG_SIZE 1024
#define MAX_TASKS 1000
+#define MAX_BUF_LEN 256
#define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field
#define BOOL_FPRINT(stream, fmt, ...) \
({ \
int ret = fprintf(stream, fmt, ##__VA_ARGS__); \
ret >= 0; \
})
+#define TASK_AVG(task, field) average_ms((task).field##_delay_total, (task).field##_count)
#define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
-
-/* Program settings structure */
-struct config {
- int delay; /* Update interval in seconds */
- int iterations; /* Number of iterations, 0 == infinite */
- int max_processes; /* Maximum number of processes to show */
- char sort_field; /* Field to sort by */
- int output_one_time; /* Output once and exit */
- int monitor_pid; /* Monitor specific PID */
- char *container_path; /* Path to container cgroup */
-};
+#define DELAY_FMT_DEFAULT "%8.2f %8.2f %8.2f %8.2f\n"
+#define DELAY_FMT_MEMVERBOSE "%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n"
+#define SORT_FIELD(name, cmd, modes) \
+ {#name, #cmd, \
+ offsetof(struct task_info, name##_delay_total), \
+ offsetof(struct task_info, name##_count), \
+ modes}
+#define END_FIELD {NULL, 0, 0}
+
+/* Display mode types */
+#define MODE_TYPE_ALL (0xFFFFFFFF)
+#define MODE_DEFAULT (1 << 0)
+#define MODE_MEMVERBOSE (1 << 1)
/* PSI statistics structure */
struct psi_stats {
@@ -119,6 +122,8 @@ struct task_info {
unsigned long long wpcopy_delay_total;
unsigned long long irq_count;
unsigned long long irq_delay_total;
+ unsigned long long mem_count;
+ unsigned long long mem_delay_total;
};
/* Container statistics structure */
@@ -130,6 +135,27 @@ struct container_stats {
int nr_io_wait; /* Number of processes in IO wait */
};
+/* Delay field structure */
+struct field_desc {
+ const char *name; /* Field name for cmdline argument */
+ const char *cmd_char; /* Interactive command */
+ unsigned long total_offset; /* Offset of total delay in task_info */
+ unsigned long count_offset; /* Offset of count in task_info */
+ size_t supported_modes; /* Supported display modes */
+};
+
+/* Program settings structure */
+struct config {
+ int delay; /* Update interval in seconds */
+ int iterations; /* Number of iterations, 0 == infinite */
+ int max_processes; /* Maximum number of processes to show */
+ int output_one_time; /* Output once and exit */
+ int monitor_pid; /* Monitor specific PID */
+ char *container_path; /* Path to container cgroup */
+ const struct field_desc *sort_field; /* Current sort field */
+ size_t display_mode; /* Current display mode */
+};
+
/* Global variables */
static struct config cfg;
static struct psi_stats psi;
@@ -137,6 +163,19 @@ static struct task_info tasks[MAX_TASKS];
static int task_count;
static int running = 1;
static struct container_stats container_stats;
+static const struct field_desc sort_fields[] = {
+ SORT_FIELD(cpu, c, MODE_DEFAULT),
+ SORT_FIELD(blkio, i, MODE_DEFAULT),
+ SORT_FIELD(irq, q, MODE_DEFAULT),
+ SORT_FIELD(mem, m, MODE_DEFAULT | MODE_MEMVERBOSE),
+ SORT_FIELD(swapin, s, MODE_MEMVERBOSE),
+ SORT_FIELD(freepages, r, MODE_MEMVERBOSE),
+ SORT_FIELD(thrashing, t, MODE_MEMVERBOSE),
+ SORT_FIELD(compact, p, MODE_MEMVERBOSE),
+ SORT_FIELD(wpcopy, w, MODE_MEMVERBOSE),
+ END_FIELD
+};
+static int sort_selected;
/* Netlink socket variables */
static int nl_sd = -1;
@@ -158,18 +197,75 @@ static void disable_raw_mode(void)
tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios);
}
+/* Find field descriptor by command line */
+static const struct field_desc *get_field_by_cmd_char(char ch)
+{
+ const struct field_desc *field;
+
+ for (field = sort_fields; field->name != NULL; field++) {
+ if (field->cmd_char[0] == ch)
+ return field;
+ }
+
+ return NULL;
+}
+
+/* Find field descriptor by name with string comparison */
+static const struct field_desc *get_field_by_name(const char *name)
+{
+ const struct field_desc *field;
+ size_t field_len;
+
+ for (field = sort_fields; field->name != NULL; field++) {
+ field_len = strlen(field->name);
+ if (field_len != strlen(name))
+ continue;
+ if (strncmp(field->name, name, field_len) == 0)
+ return field;
+ }
+
+ return NULL;
+}
+
+/* Find display name for a field descriptor */
+static const char *get_name_by_field(const struct field_desc *field)
+{
+ return field ? field->name : "UNKNOWN";
+}
+
+/* Generate string of available field names */
+static void display_available_fields(size_t mode)
+{
+ const struct field_desc *field;
+ char buf[MAX_BUF_LEN];
+
+ buf[0] = '\0';
+
+ for (field = sort_fields; field->name != NULL; field++) {
+ if (!(field->supported_modes & mode))
+ continue;
+ strncat(buf, "|", MAX_BUF_LEN - strlen(buf) - 1);
+ strncat(buf, field->name, MAX_BUF_LEN - strlen(buf) - 1);
+ buf[MAX_BUF_LEN - 1] = '\0';
+ }
+
+ fprintf(stderr, "Available fields: %s\n", buf);
+}
+
/* Display usage information and command line options */
static void usage(void)
{
printf("Usage: delaytop [Options]\n"
"Options:\n"
- " -h, --help Show this help message and exit\n"
- " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
- " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
- " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
- " -o, --once Display once and exit\n"
- " -p, --pid=PID Monitor only the specified PID\n"
- " -C, --container=PATH Monitor the container at specified cgroup path\n");
+ " -h, --help Show this help message and exit\n"
+ " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
+ " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
+ " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
+ " -o, --once Display once and exit\n"
+ " -p, --pid=PID Monitor only the specified PID\n"
+ " -C, --container=PATH Monitor the container at specified cgroup path\n"
+ " -s, --sort=FIELD Sort by delay field (default: cpu)\n"
+ " -M, --memverbose Display memory detailed information\n");
exit(0);
}
@@ -177,6 +273,7 @@ static void usage(void)
static void parse_args(int argc, char **argv)
{
int c;
+ const struct field_desc *field;
struct option long_options[] = {
{"help", no_argument, 0, 'h'},
{"delay", required_argument, 0, 'd'},
@@ -184,7 +281,9 @@ static void parse_args(int argc, char **argv)
{"pid", required_argument, 0, 'p'},
{"once", no_argument, 0, 'o'},
{"processes", required_argument, 0, 'P'},
+ {"sort", required_argument, 0, 's'},
{"container", required_argument, 0, 'C'},
+ {"memverbose", no_argument, 0, 'M'},
{0, 0, 0, 0}
};
@@ -192,15 +291,16 @@ static void parse_args(int argc, char **argv)
cfg.delay = 2;
cfg.iterations = 0;
cfg.max_processes = 20;
- cfg.sort_field = 'c'; /* Default sort by CPU delay */
+ cfg.sort_field = &sort_fields[0]; /* Default sorted by CPU delay */
cfg.output_one_time = 0;
cfg.monitor_pid = 0; /* 0 means monitor all PIDs */
cfg.container_path = NULL;
+ cfg.display_mode = MODE_DEFAULT;
while (1) {
int option_index = 0;
- c = getopt_long(argc, argv, "hd:n:p:oP:C:", long_options, &option_index);
+ c = getopt_long(argc, argv, "hd:n:p:oP:C:s:M", long_options, &option_index);
if (c == -1)
break;
@@ -247,6 +347,26 @@ static void parse_args(int argc, char **argv)
case 'C':
cfg.container_path = strdup(optarg);
break;
+ case 's':
+ if (strlen(optarg) == 0) {
+ fprintf(stderr, "Error: empty sort field\n");
+ exit(1);
+ }
+
+ field = get_field_by_name(optarg);
+ /* Show available fields if invalid option provided */
+ if (!field) {
+ fprintf(stderr, "Error: invalid sort field '%s'\n", optarg);
+ display_available_fields(MODE_TYPE_ALL);
+ exit(1);
+ }
+
+ cfg.sort_field = field;
+ break;
+ case 'M':
+ cfg.display_mode = MODE_MEMVERBOSE;
+ cfg.sort_field = get_field_by_name("mem");
+ break;
default:
fprintf(stderr, "Try 'delaytop --help' for more information.\n");
exit(1);
@@ -254,6 +374,25 @@ static void parse_args(int argc, char **argv)
}
}
+/* Calculate average delay in milliseconds for overall memory */
+static void set_mem_delay_total(struct task_info *t)
+{
+ t->mem_delay_total = t->swapin_delay_total +
+ t->freepages_delay_total +
+ t->thrashing_delay_total +
+ t->compact_delay_total +
+ t->wpcopy_delay_total;
+}
+
+static void set_mem_count(struct task_info *t)
+{
+ t->mem_count = t->swapin_count +
+ t->freepages_count +
+ t->thrashing_count +
+ t->compact_count +
+ t->wpcopy_count;
+}
+
/* Create a raw netlink socket and bind */
static int create_nl_socket(void)
{
@@ -358,87 +497,134 @@ static int get_family_id(int sd)
return id;
}
-static void read_psi_stats(void)
+static int read_psi_stats(void)
{
FILE *fp;
char line[256];
int ret = 0;
+ int error_count = 0;
+
+ /* Check if PSI path exists */
+ if (access(PSI_PATH, F_OK) != 0) {
+ fprintf(stderr, "Error: PSI interface not found at %s\n", PSI_PATH);
+ fprintf(stderr, "Please ensure your kernel supports PSI (Pressure Stall Information)\n");
+ return -1;
+ }
+
/* Zero all fields */
memset(&psi, 0, sizeof(psi));
+
/* CPU pressure */
- fp = fopen(PSI_CPU_SOME, "r");
+ fp = fopen(PSI_CPU_PATH, "r");
if (fp) {
while (fgets(line, sizeof(line), fp)) {
if (strncmp(line, "some", 4) == 0) {
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.cpu_some_avg10, &psi.cpu_some_avg60,
&psi.cpu_some_avg300, &psi.cpu_some_total);
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse CPU some PSI data\n");
+ error_count++;
+ }
} else if (strncmp(line, "full", 4) == 0) {
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.cpu_full_avg10, &psi.cpu_full_avg60,
&psi.cpu_full_avg300, &psi.cpu_full_total);
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse CPU full PSI data\n");
+ error_count++;
+ }
}
}
fclose(fp);
+ } else {
+ fprintf(stderr, "Warning: Failed to open %s\n", PSI_CPU_PATH);
+ error_count++;
}
+
/* Memory pressure */
- fp = fopen(PSI_MEMORY_SOME, "r");
+ fp = fopen(PSI_MEMORY_PATH, "r");
if (fp) {
while (fgets(line, sizeof(line), fp)) {
if (strncmp(line, "some", 4) == 0) {
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.memory_some_avg10, &psi.memory_some_avg60,
&psi.memory_some_avg300, &psi.memory_some_total);
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse Memory some PSI data\n");
+ error_count++;
+ }
} else if (strncmp(line, "full", 4) == 0) {
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.memory_full_avg10, &psi.memory_full_avg60,
&psi.memory_full_avg300, &psi.memory_full_total);
- }
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse Memory full PSI data\n");
+ error_count++;
+ }
+ }
}
fclose(fp);
+ } else {
+ fprintf(stderr, "Warning: Failed to open %s\n", PSI_MEMORY_PATH);
+ error_count++;
}
+
/* IO pressure */
- fp = fopen(PSI_IO_SOME, "r");
+ fp = fopen(PSI_IO_PATH, "r");
if (fp) {
while (fgets(line, sizeof(line), fp)) {
if (strncmp(line, "some", 4) == 0) {
ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.io_some_avg10, &psi.io_some_avg60,
&psi.io_some_avg300, &psi.io_some_total);
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse IO some PSI data\n");
+ error_count++;
+ }
} else if (strncmp(line, "full", 4) == 0) {
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.io_full_avg10, &psi.io_full_avg60,
&psi.io_full_avg300, &psi.io_full_total);
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse IO full PSI data\n");
+ error_count++;
+ }
}
}
fclose(fp);
+ } else {
+ fprintf(stderr, "Warning: Failed to open %s\n", PSI_IO_PATH);
+ error_count++;
}
+
/* IRQ pressure (only full) */
- fp = fopen(PSI_IRQ_FULL, "r");
+ fp = fopen(PSI_IRQ_PATH, "r");
if (fp) {
while (fgets(line, sizeof(line), fp)) {
if (strncmp(line, "full", 4) == 0) {
ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
&psi.irq_full_avg10, &psi.irq_full_avg60,
&psi.irq_full_avg300, &psi.irq_full_total);
- if (ret != 4)
+ if (ret != 4) {
fprintf(stderr, "Failed to parse IRQ full PSI data\n");
+ error_count++;
+ }
}
}
fclose(fp);
+ } else {
+ fprintf(stderr, "Warning: Failed to open %s\n", PSI_IRQ_PATH);
+ error_count++;
+ }
+
+ /* Return error count: 0 means success, >0 means warnings, -1 means fatal error */
+ if (error_count > 0) {
+ fprintf(stderr, "PSI stats reading completed with %d warnings\n", error_count);
+ return error_count;
}
+
+ return 0;
}
static int read_comm(int pid, char *comm_buf, size_t buf_size)
@@ -527,6 +713,8 @@ static void fetch_and_fill_task_info(int pid, const char *comm)
SET_TASK_STAT(task_count, wpcopy_delay_total);
SET_TASK_STAT(task_count, irq_count);
SET_TASK_STAT(task_count, irq_delay_total);
+ set_mem_count(&tasks[task_count]);
+ set_mem_delay_total(&tasks[task_count]);
task_count++;
}
break;
@@ -587,19 +775,23 @@ static int compare_tasks(const void *a, const void *b)
{
const struct task_info *t1 = (const struct task_info *)a;
const struct task_info *t2 = (const struct task_info *)b;
+ unsigned long long total1;
+ unsigned long long total2;
+ unsigned long count1;
+ unsigned long count2;
double avg1, avg2;
- switch (cfg.sort_field) {
- case 'c': /* CPU */
- avg1 = average_ms(t1->cpu_delay_total, t1->cpu_count);
- avg2 = average_ms(t2->cpu_delay_total, t2->cpu_count);
- if (avg1 != avg2)
- return avg2 > avg1 ? 1 : -1;
- return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
+ total1 = *(unsigned long long *)((char *)t1 + cfg.sort_field->total_offset);
+ total2 = *(unsigned long long *)((char *)t2 + cfg.sort_field->total_offset);
+ count1 = *(unsigned long *)((char *)t1 + cfg.sort_field->count_offset);
+ count2 = *(unsigned long *)((char *)t2 + cfg.sort_field->count_offset);
- default:
- return t2->cpu_delay_total > t1->cpu_delay_total ? 1 : -1;
- }
+ avg1 = average_ms(total1, count1);
+ avg2 = average_ms(total2, count2);
+ if (avg1 != avg2)
+ return avg2 > avg1 ? 1 : -1;
+
+ return 0;
}
/* Sort tasks by selected field */
@@ -673,7 +865,7 @@ static void get_container_stats(void)
}
/* Display results to stdout or log file */
-static void display_results(void)
+static void display_results(int psi_ret)
{
time_t now = time(NULL);
struct tm *tm_now = localtime(&now);
@@ -686,49 +878,53 @@ static void display_results(void)
suc &= BOOL_FPRINT(out, "\033[H\033[J");
/* PSI output (one-line, no cat style) */
- suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60/avg300/total)\n");
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "CPU some:",
- psi.cpu_some_avg10,
- psi.cpu_some_avg60,
- psi.cpu_some_avg300,
- psi.cpu_some_total / 1000);
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "CPU full:",
- psi.cpu_full_avg10,
- psi.cpu_full_avg60,
- psi.cpu_full_avg300,
- psi.cpu_full_total / 1000);
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "Memory full:",
- psi.memory_full_avg10,
- psi.memory_full_avg60,
- psi.memory_full_avg300,
- psi.memory_full_total / 1000);
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "Memory some:",
- psi.memory_some_avg10,
- psi.memory_some_avg60,
- psi.memory_some_avg300,
- psi.memory_some_total / 1000);
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "IO full:",
- psi.io_full_avg10,
- psi.io_full_avg60,
- psi.io_full_avg300,
- psi.io_full_total / 1000);
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "IO some:",
- psi.io_some_avg10,
- psi.io_some_avg60,
- psi.io_some_avg300,
- psi.io_some_total / 1000);
- suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
- "IRQ full:",
- psi.irq_full_avg10,
- psi.irq_full_avg60,
- psi.irq_full_avg300,
- psi.irq_full_total / 1000);
+ suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60vg300/total)\n");
+ if (psi_ret) {
+ suc &= BOOL_FPRINT(out, " PSI not found: check if psi=1 enabled in cmdline\n");
+ } else {
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "CPU some:",
+ psi.cpu_some_avg10,
+ psi.cpu_some_avg60,
+ psi.cpu_some_avg300,
+ psi.cpu_some_total / 1000);
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "CPU full:",
+ psi.cpu_full_avg10,
+ psi.cpu_full_avg60,
+ psi.cpu_full_avg300,
+ psi.cpu_full_total / 1000);
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "Memory full:",
+ psi.memory_full_avg10,
+ psi.memory_full_avg60,
+ psi.memory_full_avg300,
+ psi.memory_full_total / 1000);
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "Memory some:",
+ psi.memory_some_avg10,
+ psi.memory_some_avg60,
+ psi.memory_some_avg300,
+ psi.memory_some_total / 1000);
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "IO full:",
+ psi.io_full_avg10,
+ psi.io_full_avg60,
+ psi.io_full_avg300,
+ psi.io_full_total / 1000);
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "IO some:",
+ psi.io_some_avg10,
+ psi.io_some_avg60,
+ psi.io_some_avg300,
+ psi.io_some_total / 1000);
+ suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
+ "IRQ full:",
+ psi.irq_full_avg10,
+ psi.irq_full_avg60,
+ psi.irq_full_avg300,
+ psi.irq_full_total / 1000);
+ }
if (cfg.container_path) {
suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path);
@@ -738,29 +934,59 @@ static void display_results(void)
container_stats.nr_stopped, container_stats.nr_uninterruptible,
container_stats.nr_io_wait);
}
- suc &= BOOL_FPRINT(out, "Top %d processes (sorted by CPU delay):\n",
- cfg.max_processes);
- suc &= BOOL_FPRINT(out, "%5s %5s %-17s", "PID", "TGID", "COMMAND");
- suc &= BOOL_FPRINT(out, "%7s %7s %7s %7s %7s %7s %7s %7s\n",
- "CPU(ms)", "IO(ms)", "SWAP(ms)", "RCL(ms)",
- "THR(ms)", "CMP(ms)", "WP(ms)", "IRQ(ms)");
- suc &= BOOL_FPRINT(out, "-----------------------------------------------");
- suc &= BOOL_FPRINT(out, "----------------------------------------------\n");
+ /* Interacive command */
+ suc &= BOOL_FPRINT(out, "[o]sort [M]memverbose [q]quit\n");
+ if (sort_selected) {
+ if (cfg.display_mode == MODE_MEMVERBOSE)
+ suc &= BOOL_FPRINT(out,
+ "sort selection: [m]MEM [r]RCL [t]THR [p]CMP [w]WP\n");
+ else
+ suc &= BOOL_FPRINT(out,
+ "sort selection: [c]CPU [i]IO [m]MEM [q]IRQ\n");
+ }
+
+ /* Task delay output */
+ suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n",
+ cfg.max_processes, get_name_by_field(cfg.sort_field));
+
+ suc &= BOOL_FPRINT(out, "%8s %8s %-17s", "PID", "TGID", "COMMAND");
+ if (cfg.display_mode == MODE_MEMVERBOSE) {
+ suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s %8s %8s\n",
+ "MEM(ms)", "SWAP(ms)", "RCL(ms)",
+ "THR(ms)", "CMP(ms)", "WP(ms)");
+ suc &= BOOL_FPRINT(out, "-----------------------");
+ suc &= BOOL_FPRINT(out, "-----------------------");
+ suc &= BOOL_FPRINT(out, "-----------------------");
+ suc &= BOOL_FPRINT(out, "---------------------\n");
+ } else {
+ suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s\n",
+ "CPU(ms)", "IO(ms)", "IRQ(ms)", "MEM(ms)");
+ suc &= BOOL_FPRINT(out, "-----------------------");
+ suc &= BOOL_FPRINT(out, "-----------------------");
+ suc &= BOOL_FPRINT(out, "--------------------------\n");
+ }
+
count = task_count < cfg.max_processes ? task_count : cfg.max_processes;
for (i = 0; i < count; i++) {
- suc &= BOOL_FPRINT(out, "%5d %5d %-15s",
+ suc &= BOOL_FPRINT(out, "%8d %8d %-15s",
tasks[i].pid, tasks[i].tgid, tasks[i].command);
- suc &= BOOL_FPRINT(out, "%7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f %7.2f\n",
- average_ms(tasks[i].cpu_delay_total, tasks[i].cpu_count),
- average_ms(tasks[i].blkio_delay_total, tasks[i].blkio_count),
- average_ms(tasks[i].swapin_delay_total, tasks[i].swapin_count),
- average_ms(tasks[i].freepages_delay_total, tasks[i].freepages_count),
- average_ms(tasks[i].thrashing_delay_total, tasks[i].thrashing_count),
- average_ms(tasks[i].compact_delay_total, tasks[i].compact_count),
- average_ms(tasks[i].wpcopy_delay_total, tasks[i].wpcopy_count),
- average_ms(tasks[i].irq_delay_total, tasks[i].irq_count));
+ if (cfg.display_mode == MODE_MEMVERBOSE) {
+ suc &= BOOL_FPRINT(out, DELAY_FMT_MEMVERBOSE,
+ TASK_AVG(tasks[i], mem),
+ TASK_AVG(tasks[i], swapin),
+ TASK_AVG(tasks[i], freepages),
+ TASK_AVG(tasks[i], thrashing),
+ TASK_AVG(tasks[i], compact),
+ TASK_AVG(tasks[i], wpcopy));
+ } else {
+ suc &= BOOL_FPRINT(out, DELAY_FMT_DEFAULT,
+ TASK_AVG(tasks[i], cpu),
+ TASK_AVG(tasks[i], blkio),
+ TASK_AVG(tasks[i], irq),
+ TASK_AVG(tasks[i], mem));
+ }
}
suc &= BOOL_FPRINT(out, "\n");
@@ -769,11 +995,79 @@ static void display_results(void)
perror("Error writing to output");
}
+/* Check for keyboard input with timeout based on cfg.delay */
+static char check_for_keypress(void)
+{
+ struct timeval tv = {cfg.delay, 0};
+ fd_set readfds;
+ char ch = 0;
+
+ FD_ZERO(&readfds);
+ FD_SET(STDIN_FILENO, &readfds);
+ int r = select(STDIN_FILENO + 1, &readfds, NULL, NULL, &tv);
+
+ if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
+ read(STDIN_FILENO, &ch, 1);
+ return ch;
+ }
+
+ return 0;
+}
+
+#define MAX_MODE_SIZE 2
+static void toggle_display_mode(void)
+{
+ static const size_t modes[MAX_MODE_SIZE] = {MODE_DEFAULT, MODE_MEMVERBOSE};
+ static size_t cur_index;
+
+ cur_index = (cur_index + 1) % MAX_MODE_SIZE;
+ cfg.display_mode = modes[cur_index];
+}
+
+/* Handle keyboard input: sorting selection, mode toggle, or quit */
+static void handle_keypress(char ch, int *running)
+{
+ const struct field_desc *field;
+
+ /* Change sort field */
+ if (sort_selected) {
+ field = get_field_by_cmd_char(ch);
+ if (field && (field->supported_modes & cfg.display_mode))
+ cfg.sort_field = field;
+
+ sort_selected = 0;
+ /* Handle mode changes or quit */
+ } else {
+ switch (ch) {
+ case 'o':
+ sort_selected = 1;
+ break;
+ case 'M':
+ toggle_display_mode();
+ for (field = sort_fields; field->name != NULL; field++) {
+ if (field->supported_modes & cfg.display_mode) {
+ cfg.sort_field = field;
+ break;
+ }
+ }
+ break;
+ case 'q':
+ case 'Q':
+ *running = 0;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
/* Main function */
int main(int argc, char **argv)
{
+ const struct field_desc *field;
int iterations = 0;
- int use_q_quit = 0;
+ int psi_ret = 0;
+ char keypress;
/* Parse command line arguments */
parse_args(argc, argv);
@@ -793,17 +1087,24 @@ int main(int argc, char **argv)
exit(1);
}
- if (!cfg.output_one_time) {
- use_q_quit = 1;
- enable_raw_mode();
- printf("Press 'q' to quit.\n");
- fflush(stdout);
- }
+ /* Set terminal to non-canonical mode for interaction */
+ enable_raw_mode();
/* Main loop */
while (running) {
+ /* Auto-switch sort field when not matching display mode */
+ if (!(cfg.sort_field->supported_modes & cfg.display_mode)) {
+ for (field = sort_fields; field->name != NULL; field++) {
+ if (field->supported_modes & cfg.display_mode) {
+ cfg.sort_field = field;
+ printf("Auto-switched sort field to: %s\n", field->name);
+ break;
+ }
+ }
+ }
+
/* Read PSI statistics */
- read_psi_stats();
+ psi_ret = read_psi_stats();
/* Get container stats if container path provided */
if (cfg.container_path)
@@ -816,7 +1117,7 @@ int main(int argc, char **argv)
sort_tasks();
/* Display results to stdout or log file */
- display_results();
+ display_results(psi_ret);
/* Check for iterations */
if (cfg.iterations > 0 && ++iterations >= cfg.iterations)
@@ -826,32 +1127,14 @@ int main(int argc, char **argv)
if (cfg.output_one_time)
break;
- /* Check for 'q' key to quit */
- if (use_q_quit) {
- struct timeval tv = {cfg.delay, 0};
- fd_set readfds;
-
- FD_ZERO(&readfds);
- FD_SET(STDIN_FILENO, &readfds);
- int r = select(STDIN_FILENO+1, &readfds, NULL, NULL, &tv);
-
- if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
- char ch = 0;
-
- read(STDIN_FILENO, &ch, 1);
- if (ch == 'q' || ch == 'Q') {
- running = 0;
- break;
- }
- }
- } else {
- sleep(cfg.delay);
- }
+ /* Keypress for interactive usage */
+ keypress = check_for_keypress();
+ if (keypress)
+ handle_keypress(keypress, &running);
}
/* Restore terminal mode */
- if (use_q_quit)
- disable_raw_mode();
+ disable_raw_mode();
/* Cleanup */
close(nl_sd);
diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h
deleted file mode 100644
index d5dd96902817..000000000000
--- a/tools/arch/arm/include/uapi/asm/kvm.h
+++ /dev/null
@@ -1,315 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (C) 2012 - Virtual Open Systems and Columbia University
- * Author: Christoffer Dall <c.dall@virtualopensystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- */
-
-#ifndef __ARM_KVM_H__
-#define __ARM_KVM_H__
-
-#include <linux/types.h>
-#include <linux/psci.h>
-#include <asm/ptrace.h>
-
-#define __KVM_HAVE_GUEST_DEBUG
-#define __KVM_HAVE_IRQ_LINE
-#define __KVM_HAVE_READONLY_MEM
-#define __KVM_HAVE_VCPU_EVENTS
-
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-
-#define KVM_REG_SIZE(id) \
- (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
-
-/* Valid for svc_regs, abt_regs, und_regs, irq_regs in struct kvm_regs */
-#define KVM_ARM_SVC_sp svc_regs[0]
-#define KVM_ARM_SVC_lr svc_regs[1]
-#define KVM_ARM_SVC_spsr svc_regs[2]
-#define KVM_ARM_ABT_sp abt_regs[0]
-#define KVM_ARM_ABT_lr abt_regs[1]
-#define KVM_ARM_ABT_spsr abt_regs[2]
-#define KVM_ARM_UND_sp und_regs[0]
-#define KVM_ARM_UND_lr und_regs[1]
-#define KVM_ARM_UND_spsr und_regs[2]
-#define KVM_ARM_IRQ_sp irq_regs[0]
-#define KVM_ARM_IRQ_lr irq_regs[1]
-#define KVM_ARM_IRQ_spsr irq_regs[2]
-
-/* Valid only for fiq_regs in struct kvm_regs */
-#define KVM_ARM_FIQ_r8 fiq_regs[0]
-#define KVM_ARM_FIQ_r9 fiq_regs[1]
-#define KVM_ARM_FIQ_r10 fiq_regs[2]
-#define KVM_ARM_FIQ_fp fiq_regs[3]
-#define KVM_ARM_FIQ_ip fiq_regs[4]
-#define KVM_ARM_FIQ_sp fiq_regs[5]
-#define KVM_ARM_FIQ_lr fiq_regs[6]
-#define KVM_ARM_FIQ_spsr fiq_regs[7]
-
-struct kvm_regs {
- struct pt_regs usr_regs; /* R0_usr - R14_usr, PC, CPSR */
- unsigned long svc_regs[3]; /* SP_svc, LR_svc, SPSR_svc */
- unsigned long abt_regs[3]; /* SP_abt, LR_abt, SPSR_abt */
- unsigned long und_regs[3]; /* SP_und, LR_und, SPSR_und */
- unsigned long irq_regs[3]; /* SP_irq, LR_irq, SPSR_irq */
- unsigned long fiq_regs[8]; /* R8_fiq - R14_fiq, SPSR_fiq */
-};
-
-/* Supported Processor Types */
-#define KVM_ARM_TARGET_CORTEX_A15 0
-#define KVM_ARM_TARGET_CORTEX_A7 1
-#define KVM_ARM_NUM_TARGETS 2
-
-/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
-#define KVM_ARM_DEVICE_TYPE_SHIFT 0
-#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
-#define KVM_ARM_DEVICE_ID_SHIFT 16
-#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT)
-
-/* Supported device IDs */
-#define KVM_ARM_DEVICE_VGIC_V2 0
-
-/* Supported VGIC address types */
-#define KVM_VGIC_V2_ADDR_TYPE_DIST 0
-#define KVM_VGIC_V2_ADDR_TYPE_CPU 1
-
-#define KVM_VGIC_V2_DIST_SIZE 0x1000
-#define KVM_VGIC_V2_CPU_SIZE 0x2000
-
-/* Supported VGICv3 address types */
-#define KVM_VGIC_V3_ADDR_TYPE_DIST 2
-#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3
-#define KVM_VGIC_ITS_ADDR_TYPE 4
-#define KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION 5
-
-#define KVM_VGIC_V3_DIST_SIZE SZ_64K
-#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K)
-#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K)
-
-#define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */
-#define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */
-
-struct kvm_vcpu_init {
- __u32 target;
- __u32 features[7];
-};
-
-struct kvm_sregs {
-};
-
-struct kvm_fpu {
-};
-
-struct kvm_guest_debug_arch {
-};
-
-struct kvm_debug_exit_arch {
-};
-
-struct kvm_sync_regs {
- /* Used with KVM_CAP_ARM_USER_IRQ */
- __u64 device_irq_level;
-};
-
-struct kvm_arch_memory_slot {
-};
-
-/* for KVM_GET/SET_VCPU_EVENTS */
-struct kvm_vcpu_events {
- struct {
- __u8 serror_pending;
- __u8 serror_has_esr;
- __u8 ext_dabt_pending;
- /* Align it to 8 bytes */
- __u8 pad[5];
- __u64 serror_esr;
- } exception;
- __u32 reserved[12];
-};
-
-/* If you need to interpret the index values, here is the key: */
-#define KVM_REG_ARM_COPROC_MASK 0x000000000FFF0000
-#define KVM_REG_ARM_COPROC_SHIFT 16
-#define KVM_REG_ARM_32_OPC2_MASK 0x0000000000000007
-#define KVM_REG_ARM_32_OPC2_SHIFT 0
-#define KVM_REG_ARM_OPC1_MASK 0x0000000000000078
-#define KVM_REG_ARM_OPC1_SHIFT 3
-#define KVM_REG_ARM_CRM_MASK 0x0000000000000780
-#define KVM_REG_ARM_CRM_SHIFT 7
-#define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800
-#define KVM_REG_ARM_32_CRN_SHIFT 11
-/*
- * For KVM currently all guest registers are nonsecure, but we reserve a bit
- * in the encoding to distinguish secure from nonsecure for AArch32 system
- * registers that are banked by security. This is 1 for the secure banked
- * register, and 0 for the nonsecure banked register or if the register is
- * not banked by security.
- */
-#define KVM_REG_ARM_SECURE_MASK 0x0000000010000000
-#define KVM_REG_ARM_SECURE_SHIFT 28
-
-#define ARM_CP15_REG_SHIFT_MASK(x,n) \
- (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
-
-#define __ARM_CP15_REG(op1,crn,crm,op2) \
- (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
- ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
- ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
- ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
- ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
-
-#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
-
-#define __ARM_CP15_REG64(op1,crm) \
- (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
-#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
-
-/* PL1 Physical Timer Registers */
-#define KVM_REG_ARM_PTIMER_CTL ARM_CP15_REG32(0, 14, 2, 1)
-#define KVM_REG_ARM_PTIMER_CNT ARM_CP15_REG64(0, 14)
-#define KVM_REG_ARM_PTIMER_CVAL ARM_CP15_REG64(2, 14)
-
-/* Virtual Timer Registers */
-#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1)
-#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14)
-#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14)
-
-/* Normal registers are mapped as coprocessor 16. */
-#define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT)
-#define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4)
-
-/* Some registers need more space to represent values. */
-#define KVM_REG_ARM_DEMUX (0x0011 << KVM_REG_ARM_COPROC_SHIFT)
-#define KVM_REG_ARM_DEMUX_ID_MASK 0x000000000000FF00
-#define KVM_REG_ARM_DEMUX_ID_SHIFT 8
-#define KVM_REG_ARM_DEMUX_ID_CCSIDR (0x00 << KVM_REG_ARM_DEMUX_ID_SHIFT)
-#define KVM_REG_ARM_DEMUX_VAL_MASK 0x00000000000000FF
-#define KVM_REG_ARM_DEMUX_VAL_SHIFT 0
-
-/* VFP registers: we could overload CP10 like ARM does, but that's ugly. */
-#define KVM_REG_ARM_VFP (0x0012 << KVM_REG_ARM_COPROC_SHIFT)
-#define KVM_REG_ARM_VFP_MASK 0x000000000000FFFF
-#define KVM_REG_ARM_VFP_BASE_REG 0x0
-#define KVM_REG_ARM_VFP_FPSID 0x1000
-#define KVM_REG_ARM_VFP_FPSCR 0x1001
-#define KVM_REG_ARM_VFP_MVFR1 0x1006
-#define KVM_REG_ARM_VFP_MVFR0 0x1007
-#define KVM_REG_ARM_VFP_FPEXC 0x1008
-#define KVM_REG_ARM_VFP_FPINST 0x1009
-#define KVM_REG_ARM_VFP_FPINST2 0x100A
-
-/* KVM-as-firmware specific pseudo-registers */
-#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT)
-#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \
- KVM_REG_ARM_FW | ((r) & 0xffff))
-#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1)
- /* Higher values mean better protection. */
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2)
- /* Higher values mean better protection. */
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
-#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
-
-/* Device Control API: ARM VGIC */
-#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
-#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
-#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2
-#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32
-#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
-#define KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT 32
-#define KVM_DEV_ARM_VGIC_V3_MPIDR_MASK \
- (0xffffffffULL << KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT)
-#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0
-#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
-#define KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK (0xffff)
-#define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
-#define KVM_DEV_ARM_VGIC_GRP_CTRL 4
-#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
-#define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6
-#define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7
-#define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8
-#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9
-#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10
-#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \
- (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT)
-#define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff
-#define VGIC_LEVEL_INFO_LINE_LEVEL 0
-
-/* Device Control API on vcpu fd */
-#define KVM_ARM_VCPU_PMU_V3_CTRL 0
-#define KVM_ARM_VCPU_PMU_V3_IRQ 0
-#define KVM_ARM_VCPU_PMU_V3_INIT 1
-#define KVM_ARM_VCPU_TIMER_CTRL 1
-#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
-#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
-
-#define KVM_DEV_ARM_VGIC_CTRL_INIT 0
-#define KVM_DEV_ARM_ITS_SAVE_TABLES 1
-#define KVM_DEV_ARM_ITS_RESTORE_TABLES 2
-#define KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES 3
-#define KVM_DEV_ARM_ITS_CTRL_RESET 4
-
-/* KVM_IRQ_LINE irq field index values */
-#define KVM_ARM_IRQ_VCPU2_SHIFT 28
-#define KVM_ARM_IRQ_VCPU2_MASK 0xf
-#define KVM_ARM_IRQ_TYPE_SHIFT 24
-#define KVM_ARM_IRQ_TYPE_MASK 0xf
-#define KVM_ARM_IRQ_VCPU_SHIFT 16
-#define KVM_ARM_IRQ_VCPU_MASK 0xff
-#define KVM_ARM_IRQ_NUM_SHIFT 0
-#define KVM_ARM_IRQ_NUM_MASK 0xffff
-
-/* irq_type field */
-#define KVM_ARM_IRQ_TYPE_CPU 0
-#define KVM_ARM_IRQ_TYPE_SPI 1
-#define KVM_ARM_IRQ_TYPE_PPI 2
-
-/* out-of-kernel GIC cpu interrupt injection irq_number field */
-#define KVM_ARM_IRQ_CPU_IRQ 0
-#define KVM_ARM_IRQ_CPU_FIQ 1
-
-/*
- * This used to hold the highest supported SPI, but it is now obsolete
- * and only here to provide source code level compatibility with older
- * userland. The highest SPI number can be set via KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
- */
-#ifndef __KERNEL__
-#define KVM_ARM_IRQ_GIC_MAX 127
-#endif
-
-/* One single KVM irqchip, ie. the VGIC */
-#define KVM_NR_IRQCHIPS 1
-
-/* PSCI interface */
-#define KVM_PSCI_FN_BASE 0x95c1ba5e
-#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n))
-
-#define KVM_PSCI_FN_CPU_SUSPEND KVM_PSCI_FN(0)
-#define KVM_PSCI_FN_CPU_OFF KVM_PSCI_FN(1)
-#define KVM_PSCI_FN_CPU_ON KVM_PSCI_FN(2)
-#define KVM_PSCI_FN_MIGRATE KVM_PSCI_FN(3)
-
-#define KVM_PSCI_RET_SUCCESS PSCI_RET_SUCCESS
-#define KVM_PSCI_RET_NI PSCI_RET_NOT_SUPPORTED
-#define KVM_PSCI_RET_INVAL PSCI_RET_INVALID_PARAMS
-#define KVM_PSCI_RET_DENIED PSCI_RET_DENIED
-
-#endif /* __ARM_KVM_H__ */
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 9a5d85cfd1fb..f898c47e551f 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -75,11 +75,13 @@
#define ARM_CPU_PART_CORTEX_A76 0xD0B
#define ARM_CPU_PART_NEOVERSE_N1 0xD0C
#define ARM_CPU_PART_CORTEX_A77 0xD0D
+#define ARM_CPU_PART_CORTEX_A76AE 0xD0E
#define ARM_CPU_PART_NEOVERSE_V1 0xD40
#define ARM_CPU_PART_CORTEX_A78 0xD41
#define ARM_CPU_PART_CORTEX_A78AE 0xD42
#define ARM_CPU_PART_CORTEX_X1 0xD44
#define ARM_CPU_PART_CORTEX_A510 0xD46
+#define ARM_CPU_PART_CORTEX_X1C 0xD4C
#define ARM_CPU_PART_CORTEX_A520 0xD80
#define ARM_CPU_PART_CORTEX_A710 0xD47
#define ARM_CPU_PART_CORTEX_A715 0xD4D
@@ -94,6 +96,7 @@
#define ARM_CPU_PART_NEOVERSE_V3 0xD84
#define ARM_CPU_PART_CORTEX_X925 0xD85
#define ARM_CPU_PART_CORTEX_A725 0xD87
+#define ARM_CPU_PART_CORTEX_A720AE 0xD89
#define ARM_CPU_PART_NEOVERSE_N3 0xD8E
#define APM_CPU_PART_XGENE 0x000
@@ -119,9 +122,11 @@
#define QCOM_CPU_PART_KRYO 0x200
#define QCOM_CPU_PART_KRYO_2XX_GOLD 0x800
#define QCOM_CPU_PART_KRYO_2XX_SILVER 0x801
+#define QCOM_CPU_PART_KRYO_3XX_GOLD 0x802
#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
+#define QCOM_CPU_PART_ORYON_X1 0x001
#define NVIDIA_CPU_PART_DENVER 0x003
#define NVIDIA_CPU_PART_CARMEL 0x004
@@ -129,6 +134,7 @@
#define FUJITSU_CPU_PART_A64FX 0x001
#define HISI_CPU_PART_TSV110 0xD01
+#define HISI_CPU_PART_HIP09 0xD02
#define HISI_CPU_PART_HIP12 0xD06
#define APPLE_CPU_PART_M1_ICESTORM 0x022
@@ -159,11 +165,13 @@
#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
#define MIDR_NEOVERSE_N1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N1)
#define MIDR_CORTEX_A77 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A77)
+#define MIDR_CORTEX_A76AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76AE)
#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
#define MIDR_CORTEX_A78AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78AE)
#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
#define MIDR_CORTEX_A510 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A510)
+#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C)
#define MIDR_CORTEX_A520 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A520)
#define MIDR_CORTEX_A710 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A710)
#define MIDR_CORTEX_A715 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A715)
@@ -178,6 +186,7 @@
#define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3)
#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925)
#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725)
+#define MIDR_CORTEX_A720AE MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720AE)
#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
@@ -196,13 +205,26 @@
#define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO)
#define MIDR_QCOM_KRYO_2XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_GOLD)
#define MIDR_QCOM_KRYO_2XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_2XX_SILVER)
+#define MIDR_QCOM_KRYO_3XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_GOLD)
#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
+#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1)
+
+/*
+ * NOTES:
+ * - Qualcomm Kryo 5XX Prime / Gold ID themselves as MIDR_CORTEX_A77
+ * - Qualcomm Kryo 5XX Silver IDs itself as MIDR_QCOM_KRYO_4XX_SILVER
+ * - Qualcomm Kryo 6XX Prime IDs itself as MIDR_CORTEX_X1
+ * - Qualcomm Kryo 6XX Gold IDs itself as ARM_CPU_PART_CORTEX_A78
+ * - Qualcomm Kryo 6XX Silver IDs itself as MIDR_CORTEX_A55
+ */
+
#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
+#define MIDR_HISI_HIP09 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP09)
#define MIDR_HISI_HIP12 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_HIP12)
#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
@@ -225,7 +247,7 @@
#define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_CPU_VAR_REV(1, 0))
#define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_NFD1 | TCR_NFD0)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/sysreg.h>
@@ -291,6 +313,14 @@ static inline u32 __attribute_const__ read_cpuid_id(void)
return read_cpuid(MIDR_EL1);
}
+struct target_impl_cpu {
+ u64 midr;
+ u64 revidr;
+ u64 aidr;
+};
+
+bool cpu_errata_set_target_impl(u64 num, void *impl_cpus);
+
static inline u64 __attribute_const__ read_cpuid_mpidr(void)
{
return read_cpuid(MPIDR_EL1);
@@ -310,6 +340,6 @@ static inline u32 __attribute_const__ read_cpuid_cachetype(void)
{
return read_cpuid(CTR_EL0);
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif
diff --git a/tools/arch/arm64/include/asm/esr.h b/tools/arch/arm64/include/asm/esr.h
index bd592ca81571..f3c6403e5ef2 100644
--- a/tools/arch/arm64/include/asm/esr.h
+++ b/tools/arch/arm64/include/asm/esr.h
@@ -141,6 +141,8 @@
#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT)
#define ESR_ELx_AR_SHIFT (14)
#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT)
+#define ESR_ELx_VNCR_SHIFT (13)
+#define ESR_ELx_VNCR (UL(1) << ESR_ELx_VNCR_SHIFT)
#define ESR_ELx_CM_SHIFT (8)
#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT)
@@ -385,7 +387,7 @@
#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5)
#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0)
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm/types.h>
static inline unsigned long esr_brk_comment(unsigned long esr)
@@ -450,6 +452,6 @@ static inline bool esr_iss_is_eretab(unsigned long esr)
}
const char *esr_get_class_string(unsigned long esr);
-#endif /* __ASSEMBLY */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_ESR_H */
diff --git a/tools/arch/arm64/include/asm/gpr-num.h b/tools/arch/arm64/include/asm/gpr-num.h
index 05da4a7c5788..a114e4f8209b 100644
--- a/tools/arch/arm64/include/asm/gpr-num.h
+++ b/tools/arch/arm64/include/asm/gpr-num.h
@@ -2,7 +2,7 @@
#ifndef __ASM_GPR_NUM_H
#define __ASM_GPR_NUM_H
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
.equ .L__gpr_num_x\num, \num
@@ -11,7 +11,7 @@
.equ .L__gpr_num_xzr, 31
.equ .L__gpr_num_wzr, 31
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define __DEFINE_ASM_GPR_NUMS \
" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" \
@@ -21,6 +21,6 @@
" .equ .L__gpr_num_xzr, 31\n" \
" .equ .L__gpr_num_wzr, 31\n"
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_GPR_NUM_H */
diff --git a/tools/arch/arm64/include/asm/sysreg.h b/tools/arch/arm64/include/asm/sysreg.h
index 690b6ebd118f..178b7322bf04 100644
--- a/tools/arch/arm64/include/asm/sysreg.h
+++ b/tools/arch/arm64/include/asm/sysreg.h
@@ -51,7 +51,7 @@
#ifndef CONFIG_BROKEN_GAS_INST
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
// The space separator is omitted so that __emit_inst(x) can be parsed as
// either an assembler directive or an assembler macro argument.
#define __emit_inst(x) .inst(x)
@@ -70,11 +70,11 @@
(((x) >> 24) & 0x000000ff))
#endif /* CONFIG_CPU_BIG_ENDIAN */
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define __emit_inst(x) .long __INSTR_BSWAP(x)
-#else /* __ASSEMBLY__ */
+#else /* __ASSEMBLER__ */
#define __emit_inst(x) ".long " __stringify(__INSTR_BSWAP(x)) "\n\t"
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* CONFIG_BROKEN_GAS_INST */
@@ -1078,12 +1078,7 @@
#define GCS_CAP(x) ((((unsigned long)x) & GCS_CAP_ADDR_MASK) | \
GCS_CAP_VALID_TOKEN)
-#define ARM64_FEATURE_FIELD_BITS 4
-
-/* Defined for compatibility only, do not add new users. */
-#define ARM64_FEATURE_MASK(x) (x##_MASK)
-
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
.macro mrs_s, rt, sreg
__emit_inst(0xd5200000|(\sreg)|(.L__gpr_num_\rt))
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index ed5f3892674c..a792a599b9d6 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -31,7 +31,7 @@
#define KVM_SPSR_FIQ 4
#define KVM_NR_SPSR 5
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <linux/psci.h>
#include <linux/types.h>
#include <asm/ptrace.h>
diff --git a/tools/arch/loongarch/include/asm/inst.h b/tools/arch/loongarch/include/asm/inst.h
index c25b5853181d..d68fad63c8b7 100644
--- a/tools/arch/loongarch/include/asm/inst.h
+++ b/tools/arch/loongarch/include/asm/inst.h
@@ -51,6 +51,10 @@ enum reg2i16_op {
bgeu_op = 0x1b,
};
+enum reg3_op {
+ amswapw_op = 0x70c0,
+};
+
struct reg0i15_format {
unsigned int immediate : 15;
unsigned int opcode : 17;
@@ -96,6 +100,13 @@ struct reg2i16_format {
unsigned int opcode : 6;
};
+struct reg3_format {
+ unsigned int rd : 5;
+ unsigned int rj : 5;
+ unsigned int rk : 5;
+ unsigned int opcode : 17;
+};
+
union loongarch_instruction {
unsigned int word;
struct reg0i15_format reg0i15_format;
@@ -105,6 +116,7 @@ union loongarch_instruction {
struct reg2i12_format reg2i12_format;
struct reg2i14_format reg2i14_format;
struct reg2i16_format reg2i16_format;
+ struct reg3_format reg3_format;
};
#define LOONGARCH_INSN_SIZE sizeof(union loongarch_instruction)
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index eaeda001784e..077c5437f521 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -1,18 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License, version 2, as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- *
* Copyright IBM Corp. 2007
*
* Authors: Hollis Blanchard <hollisb@us.ibm.com>
diff --git a/tools/arch/riscv/include/asm/csr.h b/tools/arch/riscv/include/asm/csr.h
index 0dfc09254f99..21d8cee04638 100644
--- a/tools/arch/riscv/include/asm/csr.h
+++ b/tools/arch/riscv/include/asm/csr.h
@@ -167,7 +167,8 @@
#define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT)
#define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \
(_AC(1, UL) << IRQ_S_TIMER) | \
- (_AC(1, UL) << IRQ_S_EXT))
+ (_AC(1, UL) << IRQ_S_EXT) | \
+ (_AC(1, UL) << IRQ_PMU_OVF))
/* AIA CSR bits */
#define TOPI_IID_SHIFT 16
@@ -280,7 +281,7 @@
#define CSR_HPMCOUNTER30H 0xc9e
#define CSR_HPMCOUNTER31H 0xc9f
-#define CSR_SSCOUNTOVF 0xda0
+#define CSR_SCOUNTOVF 0xda0
#define CSR_SSTATUS 0x100
#define CSR_SIE 0x104
@@ -468,13 +469,13 @@
#define IE_TIE (_AC(0x1, UL) << RV_IRQ_TIMER)
#define IE_EIE (_AC(0x1, UL) << RV_IRQ_EXT)
-#ifdef __ASSEMBLY__
+#ifdef __ASSEMBLER__
#define __ASM_STR(x) x
#else
#define __ASM_STR(x) #x
#endif
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#define csr_swap(csr, val) \
({ \
@@ -536,6 +537,6 @@
: "memory"); \
})
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* _ASM_RISCV_CSR_H */
diff --git a/tools/arch/riscv/include/asm/vdso/processor.h b/tools/arch/riscv/include/asm/vdso/processor.h
index 662aca039848..0665b117f30f 100644
--- a/tools/arch/riscv/include/asm/vdso/processor.h
+++ b/tools/arch/riscv/include/asm/vdso/processor.h
@@ -2,7 +2,7 @@
#ifndef __ASM_VDSO_PROCESSOR_H
#define __ASM_VDSO_PROCESSOR_H
-#ifndef __ASSEMBLY__
+#ifndef __ASSEMBLER__
#include <asm-generic/barrier.h>
@@ -27,6 +27,6 @@ static inline void cpu_relax(void)
barrier();
}
-#endif /* __ASSEMBLY__ */
+#endif /* __ASSEMBLER__ */
#endif /* __ASM_VDSO_PROCESSOR_H */
diff --git a/tools/arch/s390/include/uapi/asm/bitsperlong.h b/tools/arch/s390/include/uapi/asm/bitsperlong.h
index d2bb620119bf..a226a1686a53 100644
--- a/tools/arch/s390/include/uapi/asm/bitsperlong.h
+++ b/tools/arch/s390/include/uapi/asm/bitsperlong.h
@@ -2,11 +2,7 @@
#ifndef __ASM_S390_BITSPERLONG_H
#define __ASM_S390_BITSPERLONG_H
-#ifndef __s390x__
-#define __BITS_PER_LONG 32
-#else
#define __BITS_PER_LONG 64
-#endif
#include <asm-generic/bitsperlong.h>
diff --git a/tools/arch/s390/include/uapi/asm/kvm_perf.h b/tools/arch/s390/include/uapi/asm/kvm_perf.h
deleted file mode 100644
index 84606b8cc49e..000000000000
--- a/tools/arch/s390/include/uapi/asm/kvm_perf.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Definitions for perf-kvm on s390
- *
- * Copyright 2014 IBM Corp.
- * Author(s): Alexander Yarygin <yarygin@linux.vnet.ibm.com>
- */
-
-#ifndef __LINUX_KVM_PERF_S390_H
-#define __LINUX_KVM_PERF_S390_H
-
-#include <asm/sie.h>
-
-#define DECODE_STR_LEN 40
-
-#define VCPU_ID "id"
-
-#define KVM_ENTRY_TRACE "kvm:kvm_s390_sie_enter"
-#define KVM_EXIT_TRACE "kvm:kvm_s390_sie_exit"
-#define KVM_EXIT_REASON "icptcode"
-
-#endif
diff --git a/tools/arch/x86/include/asm/asm.h b/tools/arch/x86/include/asm/asm.h
index dbe39b44256b..6e1b357c374b 100644
--- a/tools/arch/x86/include/asm/asm.h
+++ b/tools/arch/x86/include/asm/asm.h
@@ -108,18 +108,6 @@
#endif
-/*
- * Macros to generate condition code outputs from inline assembly,
- * The output operand must be type "bool".
- */
-#ifdef __GCC_ASM_FLAG_OUTPUTS__
-# define CC_SET(c) "\n\t/* output condition code " #c "*/\n"
-# define CC_OUT(c) "=@cc" #c
-#else
-# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n"
-# define CC_OUT(c) [_cc_ ## c] "=qm"
-#endif
-
#ifdef __KERNEL__
/* Exception table entry */
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index ee176236c2be..ccc01ad6ff7c 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -218,6 +218,7 @@
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */
#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */
+#define X86_FEATURE_COHERENCY_SFW_NO ( 8*32+ 4) /* SNP cache coherency software work around not needed */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */
#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */
@@ -319,7 +320,7 @@
#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */
#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */
#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */
-#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */
+#define X86_FEATURE_LKGS (12*32+18) /* Like MOV_GS except MSR_KERNEL_GS_BASE = GS.base */
#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */
@@ -406,9 +407,12 @@
#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */
#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */
-/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
+/*
+ * Linux-defined word for use with scattered/synthetic bits.
+ */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
+
#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
@@ -443,6 +447,7 @@
#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */
#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */
#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_SNP_SECURE_TSC (19*32+ 8) /* SEV-SNP Secure TSC */
#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */
#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */
@@ -456,10 +461,14 @@
#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_VERW_CLEAR (20*32+ 5) /* The memory form of VERW mitigates TSA */
#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */
+
#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
+#define X86_FEATURE_GP_ON_USER_CPUID (20*32+17) /* User CPUID faulting */
+
#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */
#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
@@ -487,6 +496,12 @@
#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */
#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */
#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */
+#define X86_FEATURE_TSA_SQ_NO (21*32+11) /* AMD CPU not vulnerable to TSA-SQ */
+#define X86_FEATURE_TSA_L1_NO (21*32+12) /* AMD CPU not vulnerable to TSA-L1 */
+#define X86_FEATURE_CLEAR_CPU_BUF_VM (21*32+13) /* Clear CPU buffers using VERW before VMRUN */
+#define X86_FEATURE_IBPB_EXIT_TO_USER (21*32+14) /* Use IBPB on exit-to-userspace, see VMSCAPE bug */
+#define X86_FEATURE_ABMC (21*32+15) /* Assignable Bandwidth Monitoring Counters */
+#define X86_FEATURE_MSR_IMM (21*32+16) /* MSR immediate form instructions */
/*
* BUG word(s)
@@ -542,5 +557,6 @@
#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */
#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */
#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */
-
+#define X86_BUG_TSA X86_BUG( 1*32+ 9) /* "tsa" CPU is affected by Transient Scheduler Attacks */
+#define X86_BUG_VMSCAPE X86_BUG( 1*32+10) /* "vmscape" CPU is affected by VMSCAPE attacks from guests */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/inat.h b/tools/arch/x86/include/asm/inat.h
index 183aa662b165..099e926595bd 100644
--- a/tools/arch/x86/include/asm/inat.h
+++ b/tools/arch/x86/include/asm/inat.h
@@ -37,6 +37,8 @@
#define INAT_PFX_EVEX 15 /* EVEX prefix */
/* x86-64 REX2 prefix */
#define INAT_PFX_REX2 16 /* 0xD5 */
+/* AMD XOP prefix */
+#define INAT_PFX_XOP 17 /* 0x8F */
#define INAT_LSTPFX_MAX 3
#define INAT_LGCPFX_MAX 11
@@ -77,6 +79,7 @@
#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
+#define INAT_XOPOK INAT_VEXOK
#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
#define INAT_EVEXONLY (1 << (INAT_FLAG_OFFS + 7))
#define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8))
@@ -111,6 +114,8 @@ extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
insn_byte_t vex_m,
insn_byte_t vex_pp);
+extern insn_attr_t inat_get_xop_attribute(insn_byte_t opcode,
+ insn_byte_t map_select);
/* Attribute checking functions */
static inline int inat_is_legacy_prefix(insn_attr_t attr)
@@ -164,6 +169,11 @@ static inline int inat_is_vex3_prefix(insn_attr_t attr)
return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
}
+static inline int inat_is_xop_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_XOP;
+}
+
static inline int inat_is_escape(insn_attr_t attr)
{
return attr & INAT_ESC_MASK;
@@ -229,6 +239,11 @@ static inline int inat_accept_vex(insn_attr_t attr)
return attr & INAT_VEXOK;
}
+static inline int inat_accept_xop(insn_attr_t attr)
+{
+ return attr & INAT_XOPOK;
+}
+
static inline int inat_must_vex(insn_attr_t attr)
{
return attr & (INAT_VEXONLY | INAT_EVEXONLY);
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index 0e5abd896ad4..8f10f2943370 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -71,7 +71,10 @@ struct insn {
* prefixes.bytes[3]: last prefix
*/
struct insn_field rex_prefix; /* REX prefix */
- struct insn_field vex_prefix; /* VEX prefix */
+ union {
+ struct insn_field vex_prefix; /* VEX prefix */
+ struct insn_field xop_prefix; /* XOP prefix */
+ };
struct insn_field opcode; /*
* opcode.bytes[0]: opcode1
* opcode.bytes[1]: opcode2
@@ -135,6 +138,17 @@ struct insn {
#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+/* XOP bit fields */
+#define X86_XOP_R(xop) ((xop) & 0x80) /* XOP Byte2 */
+#define X86_XOP_X(xop) ((xop) & 0x40) /* XOP Byte2 */
+#define X86_XOP_B(xop) ((xop) & 0x20) /* XOP Byte2 */
+#define X86_XOP_M(xop) ((xop) & 0x1f) /* XOP Byte2 */
+#define X86_XOP_W(xop) ((xop) & 0x80) /* XOP Byte3 */
+#define X86_XOP_V(xop) ((xop) & 0x78) /* XOP Byte3 */
+#define X86_XOP_L(xop) ((xop) & 0x04) /* XOP Byte3 */
+#define X86_XOP_P(xop) ((xop) & 0x03) /* XOP Byte3 */
+#define X86_XOP_M_MIN 0x08 /* Min of XOP.M */
+#define X86_XOP_M_MAX 0x1f /* Max of XOP.M */
extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
extern int insn_get_prefixes(struct insn *insn);
@@ -178,7 +192,7 @@ static inline insn_byte_t insn_rex2_m_bit(struct insn *insn)
return X86_REX2_M(insn->rex_prefix.bytes[1]);
}
-static inline int insn_is_avx(struct insn *insn)
+static inline int insn_is_avx_or_xop(struct insn *insn)
{
if (!insn->prefixes.got)
insn_get_prefixes(insn);
@@ -192,6 +206,22 @@ static inline int insn_is_evex(struct insn *insn)
return (insn->vex_prefix.nbytes == 4);
}
+/* If we already know this is AVX/XOP encoded */
+static inline int avx_insn_is_xop(struct insn *insn)
+{
+ insn_attr_t attr = inat_get_opcode_attribute(insn->vex_prefix.bytes[0]);
+
+ return inat_is_xop_prefix(attr);
+}
+
+static inline int insn_is_xop(struct insn *insn)
+{
+ if (!insn_is_avx_or_xop(insn))
+ return 0;
+
+ return avx_insn_is_xop(insn);
+}
+
static inline int insn_has_emulate_prefix(struct insn *insn)
{
return !!insn->emulate_prefix_size;
@@ -222,11 +252,26 @@ static inline insn_byte_t insn_vex_w_bit(struct insn *insn)
return X86_VEX_W(insn->vex_prefix.bytes[2]);
}
+static inline insn_byte_t insn_xop_map_bits(struct insn *insn)
+{
+ if (insn->xop_prefix.nbytes < 3) /* XOP is 3 bytes */
+ return 0;
+ return X86_XOP_M(insn->xop_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_xop_p_bits(struct insn *insn)
+{
+ return X86_XOP_P(insn->vex_prefix.bytes[2]);
+}
+
/* Get the last prefix id from last prefix or VEX prefix */
static inline int insn_last_prefix_id(struct insn *insn)
{
- if (insn_is_avx(insn))
+ if (insn_is_avx_or_xop(insn)) {
+ if (avx_insn_is_xop(insn))
+ return insn_xop_p_bits(insn);
return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+ }
if (insn->prefixes.bytes[3])
return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
@@ -267,7 +312,6 @@ static inline int insn_offset_immediate(struct insn *insn)
/**
* for_each_insn_prefix() -- Iterate prefixes in the instruction
* @insn: Pointer to struct insn.
- * @idx: Index storage.
* @prefix: Prefix byte.
*
* Iterate prefix bytes of given @insn. Each prefix byte is stored in @prefix
@@ -276,8 +320,8 @@ static inline int insn_offset_immediate(struct insn *insn)
* Since prefixes.nbytes can be bigger than 4 if some prefixes
* are repeated, it cannot be used for looping over the prefixes.
*/
-#define for_each_insn_prefix(insn, idx, prefix) \
- for (idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
+#define for_each_insn_prefix(insn, prefix) \
+ for (int idx = 0; idx < ARRAY_SIZE(insn->prefixes.bytes) && (prefix = insn->prefixes.bytes[idx]) != 0; idx++)
#define POP_SS_OPCODE 0x1f
#define MOV_SREG_OPCODE 0x8e
diff --git a/tools/arch/x86/include/asm/io.h b/tools/arch/x86/include/asm/io.h
new file mode 100644
index 000000000000..ecad61a3ea52
--- /dev/null
+++ b/tools/arch/x86/include/asm/io.h
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_X86_IO_H
+#define _TOOLS_ASM_X86_IO_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include "special_insns.h"
+
+#define build_mmio_read(name, size, type, reg, barrier) \
+static inline type name(const volatile void __iomem *addr) \
+{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \
+:"m" (*(volatile type __force *)addr) barrier); return ret; }
+
+#define build_mmio_write(name, size, type, reg, barrier) \
+static inline void name(type val, volatile void __iomem *addr) \
+{ asm volatile("mov" size " %0,%1": :reg (val), \
+"m" (*(volatile type __force *)addr) barrier); }
+
+build_mmio_read(readb, "b", unsigned char, "=q", :"memory")
+build_mmio_read(readw, "w", unsigned short, "=r", :"memory")
+build_mmio_read(readl, "l", unsigned int, "=r", :"memory")
+
+build_mmio_read(__readb, "b", unsigned char, "=q", )
+build_mmio_read(__readw, "w", unsigned short, "=r", )
+build_mmio_read(__readl, "l", unsigned int, "=r", )
+
+build_mmio_write(writeb, "b", unsigned char, "q", :"memory")
+build_mmio_write(writew, "w", unsigned short, "r", :"memory")
+build_mmio_write(writel, "l", unsigned int, "r", :"memory")
+
+build_mmio_write(__writeb, "b", unsigned char, "q", )
+build_mmio_write(__writew, "w", unsigned short, "r", )
+build_mmio_write(__writel, "l", unsigned int, "r", )
+
+#define readb readb
+#define readw readw
+#define readl readl
+#define readb_relaxed(a) __readb(a)
+#define readw_relaxed(a) __readw(a)
+#define readl_relaxed(a) __readl(a)
+#define __raw_readb __readb
+#define __raw_readw __readw
+#define __raw_readl __readl
+
+#define writeb writeb
+#define writew writew
+#define writel writel
+#define writeb_relaxed(v, a) __writeb(v, a)
+#define writew_relaxed(v, a) __writew(v, a)
+#define writel_relaxed(v, a) __writel(v, a)
+#define __raw_writeb __writeb
+#define __raw_writew __writew
+#define __raw_writel __writel
+
+#ifdef __x86_64__
+
+build_mmio_read(readq, "q", u64, "=r", :"memory")
+build_mmio_read(__readq, "q", u64, "=r", )
+build_mmio_write(writeq, "q", u64, "r", :"memory")
+build_mmio_write(__writeq, "q", u64, "r", )
+
+#define readq_relaxed(a) __readq(a)
+#define writeq_relaxed(v, a) __writeq(v, a)
+
+#define __raw_readq __readq
+#define __raw_writeq __writeq
+
+/* Let people know that we have them */
+#define readq readq
+#define writeq writeq
+
+#endif /* __x86_64__ */
+
+#include <asm-generic/io.h>
+
+/**
+ * iosubmit_cmds512 - copy data to single MMIO location, in 512-bit units
+ * @dst: destination, in MMIO space (must be 512-bit aligned)
+ * @src: source
+ * @count: number of 512 bits quantities to submit
+ *
+ * Submit data from kernel space to MMIO space, in units of 512 bits at a
+ * time. Order of access is not guaranteed, nor is a memory barrier
+ * performed afterwards.
+ *
+ * Warning: Do not use this helper unless your driver has checked that the CPU
+ * instruction is supported on the platform.
+ */
+static inline void iosubmit_cmds512(void __iomem *dst, const void *src,
+ size_t count)
+{
+ const u8 *from = src;
+ const u8 *end = from + count * 64;
+
+ while (from < end) {
+ movdir64b(dst, from);
+ from += 64;
+ }
+}
+
+#endif /* _TOOLS_ASM_X86_IO_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 5cfb5d74dd5f..9e1720d73244 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -315,12 +315,17 @@
#define PERF_CAP_PT_IDX 16
#define MSR_PEBS_LD_LAT_THRESHOLD 0x000003f6
-#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
-#define PERF_CAP_ARCH_REG BIT_ULL(7)
-#define PERF_CAP_PEBS_FORMAT 0xf00
-#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
-#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
- PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE)
+
+#define PERF_CAP_LBR_FMT 0x3f
+#define PERF_CAP_PEBS_TRAP BIT_ULL(6)
+#define PERF_CAP_ARCH_REG BIT_ULL(7)
+#define PERF_CAP_PEBS_FORMAT 0xf00
+#define PERF_CAP_FW_WRITES BIT_ULL(13)
+#define PERF_CAP_PEBS_BASELINE BIT_ULL(14)
+#define PERF_CAP_PEBS_TIMING_INFO BIT_ULL(17)
+#define PERF_CAP_PEBS_MASK (PERF_CAP_PEBS_TRAP | PERF_CAP_ARCH_REG | \
+ PERF_CAP_PEBS_FORMAT | PERF_CAP_PEBS_BASELINE | \
+ PERF_CAP_PEBS_TIMING_INFO)
#define MSR_IA32_RTIT_CTL 0x00000570
#define RTIT_CTL_TRACEEN BIT(0)
@@ -419,6 +424,7 @@
#define DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1UL << 12)
#define DEBUGCTLMSR_FREEZE_IN_SMM_BIT 14
#define DEBUGCTLMSR_FREEZE_IN_SMM (1UL << DEBUGCTLMSR_FREEZE_IN_SMM_BIT)
+#define DEBUGCTLMSR_RTM_DEBUG BIT(15)
#define MSR_PEBS_FRONTEND 0x000003f7
@@ -630,6 +636,11 @@
#define MSR_AMD_PPIN 0xc00102f1
#define MSR_AMD64_CPUID_FN_7 0xc0011002
#define MSR_AMD64_CPUID_FN_1 0xc0011004
+
+#define MSR_AMD64_CPUID_EXT_FEAT 0xc0011005
+#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT 54
+#define MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT BIT_ULL(MSR_AMD64_CPUID_EXT_FEAT_TOPOEXT_BIT)
+
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_TW_CFG 0xc0011023
@@ -698,8 +709,15 @@
#define MSR_AMD64_SNP_VMSA_REG_PROT BIT_ULL(MSR_AMD64_SNP_VMSA_REG_PROT_BIT)
#define MSR_AMD64_SNP_SMT_PROT_BIT 17
#define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT)
-#define MSR_AMD64_SNP_RESV_BIT 18
+#define MSR_AMD64_SNP_SECURE_AVIC_BIT 18
+#define MSR_AMD64_SNP_SECURE_AVIC BIT_ULL(MSR_AMD64_SNP_SECURE_AVIC_BIT)
+#define MSR_AMD64_SNP_RESV_BIT 19
#define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT)
+#define MSR_AMD64_SAVIC_CONTROL 0xc0010138
+#define MSR_AMD64_SAVIC_EN_BIT 0
+#define MSR_AMD64_SAVIC_EN BIT_ULL(MSR_AMD64_SAVIC_EN_BIT)
+#define MSR_AMD64_SAVIC_ALLOWEDNMI_BIT 1
+#define MSR_AMD64_SAVIC_ALLOWEDNMI BIT_ULL(MSR_AMD64_SAVIC_ALLOWEDNMI_BIT)
#define MSR_AMD64_RMP_BASE 0xc0010132
#define MSR_AMD64_RMP_END 0xc0010133
#define MSR_AMD64_RMP_CFG 0xc0010136
@@ -732,6 +750,12 @@
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300
#define MSR_AMD64_PERF_CNTR_GLOBAL_CTL 0xc0000301
#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR 0xc0000302
+#define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET 0xc0000303
+
+/* AMD Hardware Feedback Support MSRs */
+#define MSR_AMD_WORKLOAD_CLASS_CONFIG 0xc0000500
+#define MSR_AMD_WORKLOAD_CLASS_ID 0xc0000501
+#define MSR_AMD_WORKLOAD_HRST 0xc0000502
/* AMD Last Branch Record MSRs */
#define MSR_AMD64_LBR_SELECT 0xc000010e
@@ -831,6 +855,7 @@
#define MSR_K7_HWCR_SMMLOCK BIT_ULL(MSR_K7_HWCR_SMMLOCK_BIT)
#define MSR_K7_HWCR_IRPERF_EN_BIT 30
#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
+#define MSR_K7_HWCR_CPUID_USER_DIS_BIT 35
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
#define MSR_K7_HWCR_CPB_DIS_BIT 25
@@ -1216,6 +1241,8 @@
/* - AMD: */
#define MSR_IA32_MBA_BW_BASE 0xc0000200
#define MSR_IA32_SMBA_BW_BASE 0xc0000280
+#define MSR_IA32_L3_QOS_ABMC_CFG 0xc00003fd
+#define MSR_IA32_L3_QOS_EXT_CFG 0xc00003ff
#define MSR_IA32_EVT_CFG_BASE 0xc0000400
/* AMD-V MSRs */
diff --git a/tools/arch/x86/include/asm/special_insns.h b/tools/arch/x86/include/asm/special_insns.h
new file mode 100644
index 000000000000..04af42a99c38
--- /dev/null
+++ b/tools/arch/x86/include/asm/special_insns.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_X86_SPECIAL_INSNS_H
+#define _TOOLS_ASM_X86_SPECIAL_INSNS_H
+
+/* The dst parameter must be 64-bytes aligned */
+static inline void movdir64b(void *dst, const void *src)
+{
+ const struct { char _[64]; } *__src = src;
+ struct { char _[64]; } *__dst = dst;
+
+ /*
+ * MOVDIR64B %(rdx), rax.
+ *
+ * Both __src and __dst must be memory constraints in order to tell the
+ * compiler that no other memory accesses should be reordered around
+ * this one.
+ *
+ * Also, both must be supplied as lvalues because this tells
+ * the compiler what the object is (its size) the instruction accesses.
+ * I.e., not the pointers but what they point to, thus the deref'ing '*'.
+ */
+ asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
+ : "+m" (*__dst)
+ : "m" (*__src), "a" (__dst), "d" (__src));
+}
+
+#endif /* _TOOLS_ASM_X86_SPECIAL_INSNS_H */
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 6f3499507c5e..d420c9c066d4 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -35,6 +35,11 @@
#define MC_VECTOR 18
#define XM_VECTOR 19
#define VE_VECTOR 20
+#define CP_VECTOR 21
+
+#define HV_VECTOR 28
+#define VC_VECTOR 29
+#define SX_VECTOR 30
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_PIT
@@ -411,6 +416,35 @@ struct kvm_xcrs {
__u64 padding[16];
};
+#define KVM_X86_REG_TYPE_MSR 2
+#define KVM_X86_REG_TYPE_KVM 3
+
+#define KVM_X86_KVM_REG_SIZE(reg) \
+({ \
+ reg == KVM_REG_GUEST_SSP ? KVM_REG_SIZE_U64 : 0; \
+})
+
+#define KVM_X86_REG_TYPE_SIZE(type, reg) \
+({ \
+ __u64 type_size = (__u64)type << 32; \
+ \
+ type_size |= type == KVM_X86_REG_TYPE_MSR ? KVM_REG_SIZE_U64 : \
+ type == KVM_X86_REG_TYPE_KVM ? KVM_X86_KVM_REG_SIZE(reg) : \
+ 0; \
+ type_size; \
+})
+
+#define KVM_X86_REG_ID(type, index) \
+ (KVM_REG_X86 | KVM_X86_REG_TYPE_SIZE(type, index) | index)
+
+#define KVM_X86_REG_MSR(index) \
+ KVM_X86_REG_ID(KVM_X86_REG_TYPE_MSR, index)
+#define KVM_X86_REG_KVM(index) \
+ KVM_X86_REG_ID(KVM_X86_REG_TYPE_KVM, index)
+
+/* KVM-defined registers starting from 0 */
+#define KVM_REG_GUEST_SSP 0
+
#define KVM_SYNC_X86_REGS (1UL << 0)
#define KVM_SYNC_X86_SREGS (1UL << 1)
#define KVM_SYNC_X86_EVENTS (1UL << 2)
@@ -965,7 +999,13 @@ struct kvm_tdx_cmd {
struct kvm_tdx_capabilities {
__u64 supported_attrs;
__u64 supported_xfam;
- __u64 reserved[254];
+
+ __u64 kernel_tdvmcallinfo_1_r11;
+ __u64 user_tdvmcallinfo_1_r11;
+ __u64 kernel_tdvmcallinfo_1_r12;
+ __u64 user_tdvmcallinfo_1_r12;
+
+ __u64 reserved[250];
/* Configurable CPUID bits for userspace */
struct kvm_cpuid2 cpuid;
diff --git a/tools/arch/x86/include/uapi/asm/kvm_perf.h b/tools/arch/x86/include/uapi/asm/kvm_perf.h
deleted file mode 100644
index 125cf5cdf6c5..000000000000
--- a/tools/arch/x86/include/uapi/asm/kvm_perf.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_X86_KVM_PERF_H
-#define _ASM_X86_KVM_PERF_H
-
-#include <asm/svm.h>
-#include <asm/vmx.h>
-#include <asm/kvm.h>
-
-#define DECODE_STR_LEN 20
-
-#define VCPU_ID "vcpu_id"
-
-#define KVM_ENTRY_TRACE "kvm:kvm_entry"
-#define KVM_EXIT_TRACE "kvm:kvm_exit"
-#define KVM_EXIT_REASON "exit_reason"
-
-#endif /* _ASM_X86_KVM_PERF_H */
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
index 9c640a521a67..650e3256ea7d 100644
--- a/tools/arch/x86/include/uapi/asm/svm.h
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -118,6 +118,10 @@
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
+#define SVM_VMGEXIT_SAVIC 0x8000001a
+#define SVM_VMGEXIT_SAVIC_REGISTER_GPA 0
+#define SVM_VMGEXIT_SAVIC_UNREGISTER_GPA 1
+#define SVM_VMGEXIT_SAVIC_SELF_GPA ~0ULL
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h
index f0f4a4cf84a7..1baa86dfe029 100644
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -93,7 +93,10 @@
#define EXIT_REASON_TPAUSE 68
#define EXIT_REASON_BUS_LOCK 74
#define EXIT_REASON_NOTIFY 75
+#define EXIT_REASON_SEAMCALL 76
#define EXIT_REASON_TDCALL 77
+#define EXIT_REASON_MSR_READ_IMM 84
+#define EXIT_REASON_MSR_WRITE_IMM 85
#define VMX_EXIT_REASONS \
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -158,7 +161,9 @@
{ EXIT_REASON_TPAUSE, "TPAUSE" }, \
{ EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \
{ EXIT_REASON_NOTIFY, "NOTIFY" }, \
- { EXIT_REASON_TDCALL, "TDCALL" }
+ { EXIT_REASON_TDCALL, "TDCALL" }, \
+ { EXIT_REASON_MSR_READ_IMM, "MSR_READ_IMM" }, \
+ { EXIT_REASON_MSR_WRITE_IMM, "MSR_WRITE_IMM" }
#define VMX_EXIT_REASON_FLAGS \
{ VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" }
diff --git a/tools/arch/x86/lib/inat.c b/tools/arch/x86/lib/inat.c
index dfbcc6405941..ffcb0e27453b 100644
--- a/tools/arch/x86/lib/inat.c
+++ b/tools/arch/x86/lib/inat.c
@@ -81,3 +81,16 @@ insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
return table[opcode];
}
+insn_attr_t inat_get_xop_attribute(insn_byte_t opcode, insn_byte_t map_select)
+{
+ const insn_attr_t *table;
+
+ if (map_select < X86_XOP_M_MIN || map_select > X86_XOP_M_MAX)
+ return 0;
+ map_select -= X86_XOP_M_MIN;
+ /* At first, this checks the master table */
+ table = inat_xop_tables[map_select];
+ if (!table)
+ return 0;
+ return table[opcode];
+}
diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
index bce69c6bfa69..1d1c57c74d1f 100644
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -200,12 +200,15 @@ found:
}
insn->rex_prefix.got = 1;
- /* Decode VEX prefix */
+ /* Decode VEX/XOP prefix */
b = peek_next(insn_byte_t, insn);
- attr = inat_get_opcode_attribute(b);
- if (inat_is_vex_prefix(attr)) {
+ if (inat_is_vex_prefix(attr) || inat_is_xop_prefix(attr)) {
insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
- if (!insn->x86_64) {
+
+ if (inat_is_xop_prefix(attr) && X86_MODRM_REG(b2) == 0) {
+ /* Grp1A.0 is always POP Ev */
+ goto vex_end;
+ } else if (!insn->x86_64) {
/*
* In 32-bits mode, if the [7:6] bits (mod bits of
* ModRM) on the second byte are not 11b, it is
@@ -226,13 +229,13 @@ found:
if (insn->x86_64 && X86_VEX_W(b2))
/* VEX.W overrides opnd_size */
insn->opnd_bytes = 8;
- } else if (inat_is_vex3_prefix(attr)) {
+ } else if (inat_is_vex3_prefix(attr) || inat_is_xop_prefix(attr)) {
b2 = peek_nbyte_next(insn_byte_t, insn, 2);
insn_set_byte(&insn->vex_prefix, 2, b2);
insn->vex_prefix.nbytes = 3;
insn->next_byte += 3;
if (insn->x86_64 && X86_VEX_W(b2))
- /* VEX.W overrides opnd_size */
+ /* VEX.W/XOP.W overrides opnd_size */
insn->opnd_bytes = 8;
} else {
/*
@@ -288,9 +291,22 @@ int insn_get_opcode(struct insn *insn)
insn_set_byte(opcode, 0, op);
opcode->nbytes = 1;
- /* Check if there is VEX prefix or not */
- if (insn_is_avx(insn)) {
+ /* Check if there is VEX/XOP prefix or not */
+ if (insn_is_avx_or_xop(insn)) {
insn_byte_t m, p;
+
+ /* XOP prefix has different encoding */
+ if (unlikely(avx_insn_is_xop(insn))) {
+ m = insn_xop_map_bits(insn);
+ insn->attr = inat_get_xop_attribute(op, m);
+ if (!inat_accept_xop(insn->attr)) {
+ insn->attr = 0;
+ return -EINVAL;
+ }
+ /* XOP has only 1 byte for opcode */
+ goto end;
+ }
+
m = insn_vex_m_bits(insn);
p = insn_vex_p_bits(insn);
insn->attr = inat_get_avx_attribute(op, m, p);
@@ -383,7 +399,8 @@ int insn_get_modrm(struct insn *insn)
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_group_attribute(mod, pfx_id,
insn->attr);
- if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
+ if (insn_is_avx_or_xop(insn) && !inat_accept_vex(insn->attr) &&
+ !inat_accept_xop(insn->attr)) {
/* Bad insn */
insn->attr = 0;
return -EINVAL;
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index 262f7ca1fb95..2a4e69ecc2de 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -27,6 +27,11 @@
# (evo): this opcode is changed by EVEX prefix (EVEX opcode)
# (v): this opcode requires VEX prefix.
# (v1): this opcode only supports 128bit VEX.
+# (xop): this opcode accepts XOP prefix.
+#
+# XOP Superscripts
+# (W=0): this opcode requires XOP.W == 0
+# (W=1): this opcode requires XOP.W == 1
#
# Last Prefix Superscripts
# - (66): the last prefix is 0x66
@@ -194,7 +199,7 @@ AVXcode:
8c: MOV Ev,Sw
8d: LEA Gv,M
8e: MOV Sw,Ew
-8f: Grp1A (1A) | POP Ev (d64)
+8f: Grp1A (1A) | POP Ev (d64) | XOP (Prefix)
# 0x90 - 0x9f
90: NOP | PAUSE (F3) | XCHG r8,rAX
91: XCHG rCX/r9,rAX
@@ -1106,6 +1111,84 @@ AVXcode: 7
f8: URDMSR Rq,Id (F2),(v1),(11B) | UWRMSR Id,Rq (F3),(v1),(11B)
EndTable
+# From AMD64 Architecture Programmer's Manual Vol3, Appendix A.1.5
+Table: XOP map 8h
+Referrer:
+XOPcode: 0
+85: VPMACSSWW Vo,Ho,Wo,Lo
+86: VPMACSSWD Vo,Ho,Wo,Lo
+87: VPMACSSDQL Vo,Ho,Wo,Lo
+8e: VPMACSSDD Vo,Ho,Wo,Lo
+8f: VPMACSSDQH Vo,Ho,Wo,Lo
+95: VPMACSWW Vo,Ho,Wo,Lo
+96: VPMACSWD Vo,Ho,Wo,Lo
+97: VPMACSDQL Vo,Ho,Wo,Lo
+9e: VPMACSDD Vo,Ho,Wo,Lo
+9f: VPMACSDQH Vo,Ho,Wo,Lo
+a2: VPCMOV Vx,Hx,Wx,Lx (W=0) | VPCMOV Vx,Hx,Lx,Wx (W=1)
+a3: VPPERM Vo,Ho,Wo,Lo (W=0) | VPPERM Vo,Ho,Lo,Wo (W=1)
+a6: VPMADCSSWD Vo,Ho,Wo,Lo
+b6: VPMADCSWD Vo,Ho,Wo,Lo
+c0: VPROTB Vo,Wo,Ib
+c1: VPROTW Vo,Wo,Ib
+c2: VPROTD Vo,Wo,Ib
+c3: VPROTQ Vo,Wo,Ib
+cc: VPCOMccB Vo,Ho,Wo,Ib
+cd: VPCOMccW Vo,Ho,Wo,Ib
+ce: VPCOMccD Vo,Ho,Wo,Ib
+cf: VPCOMccQ Vo,Ho,Wo,Ib
+ec: VPCOMccUB Vo,Ho,Wo,Ib
+ed: VPCOMccUW Vo,Ho,Wo,Ib
+ee: VPCOMccUD Vo,Ho,Wo,Ib
+ef: VPCOMccUQ Vo,Ho,Wo,Ib
+EndTable
+
+Table: XOP map 9h
+Referrer:
+XOPcode: 1
+01: GrpXOP1
+02: GrpXOP2
+12: GrpXOP3
+80: VFRCZPS Vx,Wx
+81: VFRCZPD Vx,Wx
+82: VFRCZSS Vq,Wss
+83: VFRCZSD Vq,Wsd
+90: VPROTB Vo,Wo,Ho (W=0) | VPROTB Vo,Ho,Wo (W=1)
+91: VPROTW Vo,Wo,Ho (W=0) | VPROTB Vo,Ho,Wo (W=1)
+92: VPROTD Vo,Wo,Ho (W=0) | VPROTB Vo,Ho,Wo (W=1)
+93: VPROTQ Vo,Wo,Ho (W=0) | VPROTB Vo,Ho,Wo (W=1)
+94: VPSHLB Vo,Wo,Ho (W=0) | VPSHLB Vo,Ho,Wo (W=1)
+95: VPSHLW Vo,Wo,Ho (W=0) | VPSHLW Vo,Ho,Wo (W=1)
+96: VPSHLD Vo,Wo,Ho (W=0) | VPSHLD Vo,Ho,Wo (W=1)
+97: VPSHLQ Vo,Wo,Ho (W=0) | VPSHLQ Vo,Ho,Wo (W=1)
+98: VPSHAB Vo,Wo,Ho (W=0) | VPSHAB Vo,Ho,Wo (W=1)
+99: VPSHAW Vo,Wo,Ho (W=0) | VPSHAW Vo,Ho,Wo (W=1)
+9a: VPSHAD Vo,Wo,Ho (W=0) | VPSHAD Vo,Ho,Wo (W=1)
+9b: VPSHAQ Vo,Wo,Ho (W=0) | VPSHAQ Vo,Ho,Wo (W=1)
+c1: VPHADDBW Vo,Wo
+c2: VPHADDBD Vo,Wo
+c3: VPHADDBQ Vo,Wo
+c6: VPHADDWD Vo,Wo
+c7: VPHADDWQ Vo,Wo
+cb: VPHADDDQ Vo,Wo
+d1: VPHADDUBWD Vo,Wo
+d2: VPHADDUBD Vo,Wo
+d3: VPHADDUBQ Vo,Wo
+d6: VPHADDUWD Vo,Wo
+d7: VPHADDUWQ Vo,Wo
+db: VPHADDUDQ Vo,Wo
+e1: VPHSUBBW Vo,Wo
+e2: VPHSUBWD Vo,Wo
+e3: VPHSUBDQ Vo,Wo
+EndTable
+
+Table: XOP map Ah
+Referrer:
+XOPcode: 2
+10: BEXTR Gy,Ey,Id
+12: GrpXOP4
+EndTable
+
GrpTable: Grp1
0: ADD
1: OR
@@ -1320,3 +1403,29 @@ GrpTable: GrpRNG
4: xcrypt-cfb
5: xcrypt-ofb
EndTable
+
+# GrpXOP1-4 is shown in AMD APM Vol.3 Appendix A as XOP group #1-4
+GrpTable: GrpXOP1
+1: BLCFILL By,Ey (xop)
+2: BLSFILL By,Ey (xop)
+3: BLCS By,Ey (xop)
+4: TZMSK By,Ey (xop)
+5: BLCIC By,Ey (xop)
+6: BLSIC By,Ey (xop)
+7: T1MSKC By,Ey (xop)
+EndTable
+
+GrpTable: GrpXOP2
+1: BLCMSK By,Ey (xop)
+6: BLCI By,Ey (xop)
+EndTable
+
+GrpTable: GrpXOP3
+0: LLWPCB Ry (xop)
+1: SLWPCB Ry (xop)
+EndTable
+
+GrpTable: GrpXOP4
+0: LWPINS By,Ed,Id (xop)
+1: LWPVAL By,Ed,Id (xop)
+EndTable
diff --git a/tools/arch/x86/tools/gen-cpu-feature-names-x86.awk b/tools/arch/x86/tools/gen-cpu-feature-names-x86.awk
new file mode 100644
index 000000000000..cc4c7a3e6c2e
--- /dev/null
+++ b/tools/arch/x86/tools/gen-cpu-feature-names-x86.awk
@@ -0,0 +1,34 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2025, Oracle and/or its affiliates.
+#
+# Usage: awk -f gen-cpu-feature-names-x86.awk cpufeatures.h > cpu-feature-names.c
+#
+
+BEGIN {
+ print "/* cpu feature name array generated from cpufeatures.h */"
+ print "/* Do not change this code. */"
+ print
+ print "static const char *cpu_feature_names[(NCAPINTS+NBUGINTS)*32] = {"
+
+ value_expr = "\\([0-9*+ ]+\\)"
+}
+
+/^#define X86_FEATURE_/ {
+ if (match($0, value_expr)) {
+ value = substr($0, RSTART + 1, RLENGTH - 2)
+ print "\t[" value "] = \"" $2 "\","
+ }
+}
+
+/^#define X86_BUG_/ {
+ if (match($0, value_expr)) {
+ value = substr($0, RSTART + 1, RLENGTH - 2)
+ print "\t[NCAPINTS*32+(" value ")] = \"" $2 "\","
+ }
+}
+
+END {
+ print "};"
+}
diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk
index 2c19d7fc8a85..7ea1b75e59b7 100644
--- a/tools/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk
@@ -21,6 +21,7 @@ function clear_vars() {
eid = -1 # escape id
gid = -1 # group id
aid = -1 # AVX id
+ xopid = -1 # XOP id
tname = ""
}
@@ -39,9 +40,11 @@ BEGIN {
ggid = 1
geid = 1
gaid = 0
+ gxopid = 0
delete etable
delete gtable
delete atable
+ delete xoptable
opnd_expr = "^[A-Za-z/]"
ext_expr = "^\\("
@@ -61,6 +64,7 @@ BEGIN {
imm_flag["Ob"] = "INAT_MOFFSET"
imm_flag["Ov"] = "INAT_MOFFSET"
imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Lo"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
force64_expr = "\\([df]64\\)"
@@ -87,6 +91,8 @@ BEGIN {
evexonly_expr = "\\(ev\\)"
# (es) is the same as (ev) but also "SCALABLE" i.e. W and pp determine operand size
evex_scalable_expr = "\\(es\\)"
+ # All opcodes in XOP table or with (xop) superscript accept XOP prefix
+ xopok_expr = "\\(xop\\)"
prefix_expr = "\\(Prefix\\)"
prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
@@ -106,6 +112,7 @@ BEGIN {
prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
prefix_num["EVEX"] = "INAT_PFX_EVEX"
prefix_num["REX2"] = "INAT_PFX_REX2"
+ prefix_num["XOP"] = "INAT_PFX_XOP"
clear_vars()
}
@@ -147,6 +154,7 @@ function array_size(arr, i,c) {
if (NF != 1) {
# AVX/escape opcode table
aid = $2
+ xopid = -1
if (gaid <= aid)
gaid = aid + 1
if (tname == "") # AVX only opcode table
@@ -156,6 +164,20 @@ function array_size(arr, i,c) {
tname = "inat_primary_table"
}
+/^XOPcode:/ {
+ if (NF != 1) {
+ # XOP opcode table
+ xopid = $2
+ aid = -1
+ if (gxopid <= xopid)
+ gxopid = xopid + 1
+ if (tname == "") # XOP only opcode table
+ tname = sprintf("inat_xop_table_%d", $2)
+ }
+ if (xopid == -1 && eid == -1) # primary opcode table
+ tname = "inat_primary_table"
+}
+
/^GrpTable:/ {
print "/* " $0 " */"
if (!($2 in group))
@@ -206,6 +228,8 @@ function print_table(tbl,name,fmt,n)
etable[eid,0] = tname
if (aid >= 0)
atable[aid,0] = tname
+ else if (xopid >= 0)
+ xoptable[xopid] = tname
}
if (array_size(lptable1) != 0) {
print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
@@ -347,6 +371,8 @@ function convert_operands(count,opnd, i,j,imm,mod)
flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
flags = add_flags(flags, "INAT_VEXOK")
+ else if (match(ext, xopok_expr) || xopid >= 0)
+ flags = add_flags(flags, "INAT_XOPOK")
# check prefixes
if (match(ext, prefix_expr)) {
@@ -413,6 +439,14 @@ END {
print " ["i"]["j"] = "atable[i,j]","
print "};\n"
+ print "/* XOP opcode map array */"
+ print "const insn_attr_t * const inat_xop_tables[X86_XOP_M_MAX - X86_XOP_M_MIN + 1]" \
+ " = {"
+ for (i = 0; i < gxopid; i++)
+ if (xoptable[i])
+ print " ["i"] = "xoptable[i]","
+ print "};"
+
print "#else /* !__BOOT_COMPRESSED */\n"
print "/* Escape opcode map array */"
@@ -430,6 +464,10 @@ END {
"[INAT_LSTPFX_MAX + 1];"
print ""
+ print "/* XOP opcode map array */"
+ print "static const insn_attr_t *inat_xop_tables[X86_XOP_M_MAX - X86_XOP_M_MIN + 1];"
+ print ""
+
print "static void inat_init_tables(void)"
print "{"
@@ -455,6 +493,12 @@ END {
if (atable[i,j])
print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";"
+ print ""
+ print "\t/* Print XOP opcode map array */"
+ for (i = 0; i < gxopid; i++)
+ if (xoptable[i])
+ print "\tinat_xop_tables["i"] = "xoptable[i]";"
+
print "}"
print "#endif"
}
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index 57c669d2aa90..55d59ed507d5 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -193,7 +193,7 @@ static int load_xbc_from_initrd(int fd, char **buf)
if (stat.st_size < BOOTCONFIG_FOOTER_SIZE)
return 0;
- if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0)
+ if (lseek(fd, -(off_t)BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0)
return pr_errno("Failed to lseek for magic", -errno);
if (read(fd, magic, BOOTCONFIG_MAGIC_LEN) < 0)
@@ -203,7 +203,7 @@ static int load_xbc_from_initrd(int fd, char **buf)
if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0)
return 0;
- if (lseek(fd, -BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0)
+ if (lseek(fd, -(off_t)BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0)
return pr_errno("Failed to lseek for size", -errno);
if (read(fd, &size, sizeof(uint32_t)) < 0)
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 062bbd6cd048..fd2585af1252 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -32,7 +32,7 @@ FEATURE_TESTS = libbfd disassembler-four-args disassembler-init-styled
FEATURE_DISPLAY = libbfd
check_feat := 1
-NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean resolve_btfids_clean
+NON_CHECK_FEAT_TARGETS := clean bpftool_clean resolve_btfids_clean
ifdef MAKECMDGOALS
ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
check_feat := 0
@@ -70,7 +70,7 @@ $(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
-all: $(PROGS) bpftool runqslower
+all: $(PROGS) bpftool
$(OUTPUT)bpf_jit_disasm: CFLAGS += -DPACKAGE='bpf_jit_disasm'
$(OUTPUT)bpf_jit_disasm: $(OUTPUT)bpf_jit_disasm.o
@@ -86,7 +86,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
-clean: bpftool_clean runqslower_clean resolve_btfids_clean
+clean: bpftool_clean resolve_btfids_clean
$(call QUIET_CLEAN, bpf-progs)
$(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
$(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
@@ -112,12 +112,6 @@ bpftool_install:
bpftool_clean:
$(call descend,bpftool,clean)
-runqslower:
- $(call descend,runqslower)
-
-runqslower_clean:
- $(call descend,runqslower,clean)
-
resolve_btfids:
$(call descend,resolve_btfids)
@@ -125,5 +119,4 @@ resolve_btfids_clean:
$(call descend,resolve_btfids,clean)
.PHONY: all install clean bpftool bpftool_install bpftool_clean \
- runqslower runqslower_clean \
resolve_btfids resolve_btfids_clean
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index ca860fd97d8d..d0a36f442db7 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -16,7 +16,7 @@ SYNOPSIS
**bpftool** [*OPTIONS*] **gen** *COMMAND*
-*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } }
+*OPTIONS* := { |COMMON_OPTIONS| | { **-L** | **--use-loader** } | [ { **-S** | **--sign** } {**-k** <private_key.pem>} **-i** <certificate.x509> ] }
*COMMAND* := { **object** | **skeleton** | **help** }
@@ -186,6 +186,17 @@ OPTIONS
skeleton). A light skeleton contains a loader eBPF program. It does not use
the majority of the libbpf infrastructure, and does not need libelf.
+-S, --sign
+ For skeletons, generate a signed skeleton. This option must be used with
+ **-k** and **-i**. Using this flag implicitly enables **--use-loader**.
+
+-k <private_key.pem>
+ Path to the private key file in PEM format, required for signing.
+
+-i <certificate.x509>
+ Path to the X.509 certificate file in PEM or DER format, required for
+ signing.
+
EXAMPLES
========
**$ cat example1.bpf.c**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 252e4c538edb..1af3305ea2b2 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -55,7 +55,8 @@ MAP COMMANDS
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
| | **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
-| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena** }
+| | **task_storage** | **bloom_filter** | **user_ringbuf** | **cgrp_storage** | **arena**
+| | **insn_array** }
DESCRIPTION
===========
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index f69fd92df8d8..35aeeaf5f711 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -18,7 +18,7 @@ SYNOPSIS
*OPTIONS* := { |COMMON_OPTIONS| |
{ **-f** | **--bpffs** } | { **-m** | **--mapcompat** } | { **-n** | **--nomount** } |
-{ **-L** | **--use-loader** } }
+{ **-L** | **--use-loader** } | [ { **-S** | **--sign** } **-k** <private_key.pem> **-i** <certificate.x509> ] }
*COMMANDS* :=
{ **show** | **list** | **dump xlated** | **dump jited** | **pin** | **load** |
@@ -182,7 +182,7 @@ bpftool prog tracelog
bpftool prog tracelog { stdout | stderr } *PROG*
Dump the BPF stream of the program. BPF programs can write to these streams
- at runtime with the **bpf_stream_vprintk**\ () kfunc. The kernel may write
+ at runtime with the **bpf_stream_vprintk_impl**\ () kfunc. The kernel may write
error messages to the standard error stream. This facility should be used
only for debugging purposes.
@@ -248,6 +248,18 @@ OPTIONS
creating the maps, and loading the programs (see **bpftool prog tracelog**
as a way to dump those messages).
+-S, --sign
+ Enable signing of the BPF program before loading. This option must be
+ used with **-k** and **-i**. Using this flag implicitly enables
+ **--use-loader**.
+
+-k <private_key.pem>
+ Path to the private key file in PEM format, required when signing.
+
+-i <certificate.x509>
+ Path to the X.509 certificate file in PEM or DER format, required when
+ signing.
+
EXAMPLES
========
**# bpftool prog show**
diff --git a/tools/bpf/bpftool/Documentation/bpftool-token.rst b/tools/bpf/bpftool/Documentation/bpftool-token.rst
new file mode 100644
index 000000000000..d082c499cfe3
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-token.rst
@@ -0,0 +1,64 @@
+.. SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+================
+bpftool-token
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF tokens
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+.. include:: substitutions.rst
+
+SYNOPSIS
+========
+
+**bpftool** [*OPTIONS*] **token** *COMMAND*
+
+*OPTIONS* := { |COMMON_OPTIONS| }
+
+*COMMANDS* := { **show** | **list** | **help** }
+
+TOKEN COMMANDS
+===============
+
+| **bpftool** **token** { **show** | **list** }
+| **bpftool** **token help**
+|
+
+DESCRIPTION
+===========
+bpftool token { show | list }
+ List BPF token information for each *bpffs* mount point containing token
+ information on the system. Information include mount point path, allowed
+ **bpf**\ () system call commands, maps, programs, and attach types for the
+ token.
+
+bpftool prog help
+ Print short help message.
+
+OPTIONS
+========
+.. include:: common_options.rst
+
+EXAMPLES
+========
+|
+| **# mkdir -p /sys/fs/bpf/token**
+| **# mount -t bpf bpffs /sys/fs/bpf/token** \
+| **-o delegate_cmds=prog_load:map_create** \
+| **-o delegate_progs=kprobe** \
+| **-o delegate_attachs=xdp**
+| **# bpftool token list**
+
+::
+
+ token_info /sys/fs/bpf/token
+ allowed_cmds:
+ map_create prog_load
+ allowed_maps:
+ allowed_progs:
+ kprobe
+ allowed_attachs:
+ xdp
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 9e9a5f006cd2..586d1b2595d1 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -130,8 +130,8 @@ include $(FEATURES_DUMP)
endif
endif
-LIBS = $(LIBBPF) -lelf -lz
-LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz
+LIBS = $(LIBBPF) -lelf -lz -lcrypto
+LIBS_BOOTSTRAP = $(LIBBPF_BOOTSTRAP) -lelf -lz -lcrypto
ifeq ($(feature-libelf-zstd),1)
LIBS += -lzstd
@@ -194,7 +194,7 @@ endif
BPFTOOL_BOOTSTRAP := $(BOOTSTRAP_OUTPUT)bpftool
-BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o)
+BOOTSTRAP_OBJS = $(addprefix $(BOOTSTRAP_OUTPUT),main.o common.o json_writer.o gen.o btf.o sign.o)
$(BOOTSTRAP_OBJS): $(LIBBPF_BOOTSTRAP)
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index a759ba24471d..53bcfeb1a76e 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -262,7 +262,7 @@ _bpftool()
# Deal with options
if [[ ${words[cword]} == -* ]]; then
local c='--version --json --pretty --bpffs --mapcompat --debug \
- --use-loader --base-btf'
+ --use-loader --base-btf --sign -i -k'
COMPREPLY=( $( compgen -W "$c" -- "$cur" ) )
return 0
fi
@@ -283,7 +283,7 @@ _bpftool()
_sysfs_get_netdevs
return 0
;;
- file|pinned|-B|--base-btf)
+ file|pinned|-B|--base-btf|-i|-k)
_filedir
return 0
;;
@@ -296,13 +296,21 @@ _bpftool()
# Remove all options so completions don't have to deal with them.
local i pprev
for (( i=1; i < ${#words[@]}; )); do
- if [[ ${words[i]::1} == - ]] &&
- [[ ${words[i]} != "-B" ]] && [[ ${words[i]} != "--base-btf" ]]; then
- words=( "${words[@]:0:i}" "${words[@]:i+1}" )
- [[ $i -le $cword ]] && cword=$(( cword - 1 ))
- else
- i=$(( ++i ))
- fi
+ case ${words[i]} in
+ # Remove option and its argument
+ -B|--base-btf|-i|-k)
+ words=( "${words[@]:0:i}" "${words[@]:i+2}" )
+ [[ $i -le $(($cword + 1)) ]] && cword=$(( cword - 2 ))
+ ;;
+ # No argument, remove option only
+ -*)
+ words=( "${words[@]:0:i}" "${words[@]:i+1}" )
+ [[ $i -le $cword ]] && cword=$(( cword - 1 ))
+ ;;
+ *)
+ i=$(( ++i ))
+ ;;
+ esac
done
cur=${words[cword]}
prev=${words[cword - 1]}
@@ -1215,6 +1223,17 @@ _bpftool()
;;
esac
;;
+ token)
+ case $command in
+ show|list)
+ return 0
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'help show list' -- "$cur" ) )
+ ;;
+ esac
+ ;;
esac
} &&
complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 4e896d8a2416..def297e879f4 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -38,7 +38,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
__u32 info_len = sizeof(info);
const char *prog_name = NULL;
struct btf *prog_btf = NULL;
- struct bpf_func_info finfo;
+ struct bpf_func_info finfo = {};
__u32 finfo_rec_size;
char prog_str[1024];
int err;
@@ -590,7 +590,7 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
case BTF_KIND_DATASEC:
return btf_dumper_datasec(d, type_id, data);
default:
- jsonw_printf(d->jw, "(unsupported-kind");
+ jsonw_printf(d->jw, "(unsupported-kind)");
return -EINVAL;
}
}
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 944ebe21a216..ec356deb27c9 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -2,6 +2,10 @@
// Copyright (C) 2017 Facebook
// Author: Roman Gushchin <guro@fb.com>
+#undef GCC_VERSION
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
#define _XOPEN_SOURCE 500
#include <errno.h>
#include <fcntl.h>
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b07317d2842f..e8daf963ecef 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -21,6 +21,7 @@
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/vfs.h>
+#include <sys/utsname.h>
#include <linux/filter.h>
#include <linux/limits.h>
@@ -31,6 +32,7 @@
#include <bpf/hashmap.h>
#include <bpf/libbpf.h> /* libbpf_num_possible_cpus */
#include <bpf/btf.h>
+#include <zlib.h>
#include "main.h"
@@ -1208,3 +1210,94 @@ int pathname_concat(char *buf, int buf_sz, const char *path,
return 0;
}
+
+static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
+ char **value)
+{
+ char *sep;
+
+ while (gzgets(file, buf, n)) {
+ if (strncmp(buf, "CONFIG_", 7))
+ continue;
+
+ sep = strchr(buf, '=');
+ if (!sep)
+ continue;
+
+ /* Trim ending '\n' */
+ buf[strlen(buf) - 1] = '\0';
+
+ /* Split on '=' and ensure that a value is present. */
+ *sep = '\0';
+ if (!sep[1])
+ continue;
+
+ *value = sep + 1;
+ return true;
+ }
+
+ return false;
+}
+
+int read_kernel_config(const struct kernel_config_option *requested_options,
+ size_t num_options, char **out_values,
+ const char *define_prefix)
+{
+ struct utsname utsn;
+ char path[PATH_MAX];
+ gzFile file = NULL;
+ char buf[4096];
+ char *value;
+ size_t i;
+ int ret = 0;
+
+ if (!requested_options || !out_values || num_options == 0)
+ return -1;
+
+ if (!uname(&utsn)) {
+ snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
+
+ /* gzopen also accepts uncompressed files. */
+ file = gzopen(path, "r");
+ }
+
+ if (!file) {
+ /* Some distributions build with CONFIG_IKCONFIG=y and put the
+ * config file at /proc/config.gz.
+ */
+ file = gzopen("/proc/config.gz", "r");
+ }
+
+ if (!file) {
+ p_info("skipping kernel config, can't open file: %s",
+ strerror(errno));
+ return -1;
+ }
+
+ if (!gzgets(file, buf, sizeof(buf)) || !gzgets(file, buf, sizeof(buf))) {
+ p_info("skipping kernel config, can't read from file: %s",
+ strerror(errno));
+ ret = -1;
+ goto end_parse;
+ }
+
+ if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) {
+ p_info("skipping kernel config, can't find correct file");
+ ret = -1;
+ goto end_parse;
+ }
+
+ while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
+ for (i = 0; i < num_options; i++) {
+ if ((define_prefix && !requested_options[i].macro_dump) ||
+ out_values[i] || strcmp(buf, requested_options[i].name))
+ continue;
+
+ out_values[i] = strdup(value);
+ }
+ }
+
+end_parse:
+ gzclose(file);
+ return ret;
+}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 24fecdf8e430..0f6070a0c8e7 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -10,7 +10,6 @@
#ifdef USE_LIBCAP
#include <sys/capability.h>
#endif
-#include <sys/utsname.h>
#include <sys/vfs.h>
#include <linux/filter.h>
@@ -18,7 +17,6 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include <zlib.h>
#include "main.h"
@@ -327,40 +325,9 @@ static void probe_jit_limit(void)
}
}
-static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
- char **value)
-{
- char *sep;
-
- while (gzgets(file, buf, n)) {
- if (strncmp(buf, "CONFIG_", 7))
- continue;
-
- sep = strchr(buf, '=');
- if (!sep)
- continue;
-
- /* Trim ending '\n' */
- buf[strlen(buf) - 1] = '\0';
-
- /* Split on '=' and ensure that a value is present. */
- *sep = '\0';
- if (!sep[1])
- continue;
-
- *value = sep + 1;
- return true;
- }
-
- return false;
-}
-
static void probe_kernel_image_config(const char *define_prefix)
{
- static const struct {
- const char * const name;
- bool macro_dump;
- } options[] = {
+ struct kernel_config_option options[] = {
/* Enable BPF */
{ "CONFIG_BPF", },
/* Enable bpf() syscall */
@@ -435,52 +402,11 @@ static void probe_kernel_image_config(const char *define_prefix)
{ "CONFIG_HZ", true, }
};
char *values[ARRAY_SIZE(options)] = { };
- struct utsname utsn;
- char path[PATH_MAX];
- gzFile file = NULL;
- char buf[4096];
- char *value;
size_t i;
- if (!uname(&utsn)) {
- snprintf(path, sizeof(path), "/boot/config-%s", utsn.release);
-
- /* gzopen also accepts uncompressed files. */
- file = gzopen(path, "r");
- }
-
- if (!file) {
- /* Some distributions build with CONFIG_IKCONFIG=y and put the
- * config file at /proc/config.gz.
- */
- file = gzopen("/proc/config.gz", "r");
- }
- if (!file) {
- p_info("skipping kernel config, can't open file: %s",
- strerror(errno));
- goto end_parse;
- }
- /* Sanity checks */
- if (!gzgets(file, buf, sizeof(buf)) ||
- !gzgets(file, buf, sizeof(buf))) {
- p_info("skipping kernel config, can't read from file: %s",
- strerror(errno));
- goto end_parse;
- }
- if (strcmp(buf, "# Automatically generated file; DO NOT EDIT.\n")) {
- p_info("skipping kernel config, can't find correct file");
- goto end_parse;
- }
-
- while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
- for (i = 0; i < ARRAY_SIZE(options); i++) {
- if ((define_prefix && !options[i].macro_dump) ||
- values[i] || strcmp(buf, options[i].name))
- continue;
-
- values[i] = strdup(value);
- }
- }
+ if (read_kernel_config(options, ARRAY_SIZE(options), values,
+ define_prefix))
+ return;
for (i = 0; i < ARRAY_SIZE(options); i++) {
if (define_prefix && !options[i].macro_dump)
@@ -488,10 +414,6 @@ static void probe_kernel_image_config(const char *define_prefix)
print_kernel_option(options[i].name, values[i], define_prefix);
free(values[i]);
}
-
-end_parse:
- if (file)
- gzclose(file);
}
static bool probe_bpf_syscall(const char *define_prefix)
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 67a60114368f..993c7d9484a4 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -688,10 +688,17 @@ static void codegen_destroy(struct bpf_object *obj, const char *obj_name)
static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *header_guard)
{
DECLARE_LIBBPF_OPTS(gen_loader_opts, opts);
+ struct bpf_load_and_run_opts sopts = {};
+ char sig_buf[MAX_SIG_SIZE];
+ __u8 prog_sha[SHA256_DIGEST_LENGTH];
struct bpf_map *map;
+
char ident[256];
int err = 0;
+ if (sign_progs)
+ opts.gen_hash = true;
+
err = bpf_object__gen_loader(obj, &opts);
if (err)
return err;
@@ -701,6 +708,7 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
p_err("failed to load object file");
goto out;
}
+
/* If there was no error during load then gen_loader_opts
* are populated with the loader program.
*/
@@ -780,8 +788,52 @@ static int gen_trace(struct bpf_object *obj, const char *obj_name, const char *h
print_hex(opts.insns, opts.insns_sz);
codegen("\
\n\
- \"; \n\
- \n\
+ \";\n");
+
+ if (sign_progs) {
+ sopts.insns = opts.insns;
+ sopts.insns_sz = opts.insns_sz;
+ sopts.excl_prog_hash = prog_sha;
+ sopts.excl_prog_hash_sz = sizeof(prog_sha);
+ sopts.signature = sig_buf;
+ sopts.signature_sz = MAX_SIG_SIZE;
+
+ err = bpftool_prog_sign(&sopts);
+ if (err < 0) {
+ p_err("failed to sign program");
+ goto out;
+ }
+
+ codegen("\
+ \n\
+ static const char opts_sig[] __attribute__((__aligned__(8))) = \"\\\n\
+ ");
+ print_hex((const void *)sig_buf, sopts.signature_sz);
+ codegen("\
+ \n\
+ \";\n");
+
+ codegen("\
+ \n\
+ static const char opts_excl_hash[] __attribute__((__aligned__(8))) = \"\\\n\
+ ");
+ print_hex((const void *)prog_sha, sizeof(prog_sha));
+ codegen("\
+ \n\
+ \";\n");
+
+ codegen("\
+ \n\
+ opts.signature = (void *)opts_sig; \n\
+ opts.signature_sz = sizeof(opts_sig) - 1; \n\
+ opts.excl_prog_hash = (void *)opts_excl_hash; \n\
+ opts.excl_prog_hash_sz = sizeof(opts_excl_hash) - 1; \n\
+ opts.keyring_id = skel->keyring_id; \n\
+ ");
+ }
+
+ codegen("\
+ \n\
opts.ctx = (struct bpf_loader_ctx *)skel; \n\
opts.data_sz = sizeof(opts_data) - 1; \n\
opts.data = (void *)opts_data; \n\
@@ -1240,7 +1292,7 @@ static int do_skeleton(int argc, char **argv)
err = -errno;
libbpf_strerror(err, err_buf, sizeof(err_buf));
p_err("failed to open BPF object file: %s", err_buf);
- goto out;
+ goto out_obj;
}
bpf_object__for_each_map(map, obj) {
@@ -1355,6 +1407,13 @@ static int do_skeleton(int argc, char **argv)
printf("\t} links;\n");
}
+ if (sign_progs) {
+ codegen("\
+ \n\
+ __s32 keyring_id; \n\
+ ");
+ }
+
if (btf) {
err = codegen_datasecs(obj, obj_name);
if (err)
@@ -1552,6 +1611,7 @@ static int do_skeleton(int argc, char **argv)
err = 0;
out:
bpf_object__close(obj);
+out_obj:
if (obj_data)
munmap(obj_data, mmap_sz);
close(fd);
@@ -1930,7 +1990,7 @@ static int do_help(int argc, char **argv)
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_OPTIONS " |\n"
- " {-L|--use-loader} }\n"
+ " {-L|--use-loader} | [ {-S|--sign } {-k} <private_key.pem> {-i} <certificate.x509> ]}\n"
"",
bin_name, "gen");
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index a773e05d5ade..bdcd717b0348 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -282,11 +282,52 @@ get_addr_cookie_array(__u64 *addrs, __u64 *cookies, __u32 count)
return data;
}
+static bool is_x86_ibt_enabled(void)
+{
+#if defined(__x86_64__)
+ struct kernel_config_option options[] = {
+ { "CONFIG_X86_KERNEL_IBT", },
+ };
+ char *values[ARRAY_SIZE(options)] = { };
+ bool ret;
+
+ if (read_kernel_config(options, ARRAY_SIZE(options), values, NULL))
+ return false;
+
+ ret = !!values[0];
+ free(values[0]);
+ return ret;
+#else
+ return false;
+#endif
+}
+
+static bool
+symbol_matches_target(__u64 sym_addr, __u64 target_addr, bool is_ibt_enabled)
+{
+ if (sym_addr == target_addr)
+ return true;
+
+ /*
+ * On x86_64 architectures with CET (Control-flow Enforcement Technology),
+ * function entry points have a 4-byte 'endbr' instruction prefix.
+ * This causes kprobe hooks to target the address *after* 'endbr'
+ * (symbol address + 4), preserving the CET instruction.
+ * Here we check if the symbol address matches the hook target address
+ * minus 4, indicating a CET-enabled function entry point.
+ */
+ if (is_ibt_enabled && sym_addr == target_addr - 4)
+ return true;
+
+ return false;
+}
+
static void
show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
{
struct addr_cookie *data;
__u32 i, j = 0;
+ bool is_ibt_enabled;
jsonw_bool_field(json_wtr, "retprobe",
info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN);
@@ -306,11 +347,13 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr)
if (!dd.sym_count)
goto error;
+ is_ibt_enabled = is_x86_ibt_enabled();
for (i = 0; i < dd.sym_count; i++) {
- if (dd.sym_mapping[i].address != data[j].addr)
+ if (!symbol_matches_target(dd.sym_mapping[i].address,
+ data[j].addr, is_ibt_enabled))
continue;
jsonw_start_object(json_wtr);
- jsonw_uint_field(json_wtr, "addr", dd.sym_mapping[i].address);
+ jsonw_uint_field(json_wtr, "addr", (unsigned long)data[j].addr);
jsonw_string_field(json_wtr, "func", dd.sym_mapping[i].name);
/* Print null if it is vmlinux */
if (dd.sym_mapping[i].module[0] == '\0') {
@@ -719,6 +762,7 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
{
struct addr_cookie *data;
__u32 i, j = 0;
+ bool is_ibt_enabled;
if (!info->kprobe_multi.count)
return;
@@ -742,12 +786,14 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info)
if (!dd.sym_count)
goto error;
+ is_ibt_enabled = is_x86_ibt_enabled();
printf("\n\t%-16s %-16s %s", "addr", "cookie", "func [module]");
for (i = 0; i < dd.sym_count; i++) {
- if (dd.sym_mapping[i].address != data[j].addr)
+ if (!symbol_matches_target(dd.sym_mapping[i].address,
+ data[j].addr, is_ibt_enabled))
continue;
printf("\n\t%016lx %-16llx %s",
- dd.sym_mapping[i].address, data[j].cookie, dd.sym_mapping[i].name);
+ (unsigned long)data[j].addr, data[j].cookie, dd.sym_mapping[i].name);
if (dd.sym_mapping[i].module[0] != '\0')
printf(" [%s] ", dd.sym_mapping[i].module);
else
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 2b7f2bd3a7db..a829a6a49037 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -33,6 +33,9 @@ bool relaxed_maps;
bool use_loader;
struct btf *base_btf;
struct hashmap *refs_table;
+bool sign_progs;
+const char *private_key_path;
+const char *cert_path;
static void __noreturn clean_and_exit(int i)
{
@@ -61,7 +64,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
+ " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter | token }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-V|--version} }\n"
"",
@@ -87,6 +90,7 @@ static const struct cmd commands[] = {
{ "gen", do_gen },
{ "struct_ops", do_struct_ops },
{ "iter", do_iter },
+ { "token", do_token },
{ "version", do_version },
{ 0 }
};
@@ -447,6 +451,7 @@ int main(int argc, char **argv)
{ "nomount", no_argument, NULL, 'n' },
{ "debug", no_argument, NULL, 'd' },
{ "use-loader", no_argument, NULL, 'L' },
+ { "sign", no_argument, NULL, 'S' },
{ "base-btf", required_argument, NULL, 'B' },
{ 0 }
};
@@ -473,7 +478,7 @@ int main(int argc, char **argv)
bin_name = "bpftool";
opterr = 0;
- while ((opt = getopt_long(argc, argv, "VhpjfLmndB:l",
+ while ((opt = getopt_long(argc, argv, "VhpjfLmndSi:k:B:l",
options, NULL)) >= 0) {
switch (opt) {
case 'V':
@@ -519,6 +524,16 @@ int main(int argc, char **argv)
case 'L':
use_loader = true;
break;
+ case 'S':
+ sign_progs = true;
+ use_loader = true;
+ break;
+ case 'k':
+ private_key_path = optarg;
+ break;
+ case 'i':
+ cert_path = optarg;
+ break;
default:
p_err("unrecognized option '%s'", argv[optind - 1]);
if (json_output)
@@ -533,6 +548,16 @@ int main(int argc, char **argv)
if (argc < 0)
usage();
+ if (sign_progs && (private_key_path == NULL || cert_path == NULL)) {
+ p_err("-i <identity_x509_cert> and -k <private_key> must be supplied with -S for signing");
+ return -EINVAL;
+ }
+
+ if (!sign_progs && (private_key_path != NULL || cert_path != NULL)) {
+ p_err("--sign (or -S) must be explicitly passed with -i <identity_x509_cert> and -k <private_key> to sign the programs");
+ return -EINVAL;
+ }
+
if (version_requested)
ret = do_version(argc, argv);
else
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 6db704fda5c0..1130299cede0 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -6,9 +6,14 @@
/* BFD and kernel.h both define GCC_VERSION, differently */
#undef GCC_VERSION
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
#include <stdbool.h>
#include <stdio.h>
+#include <errno.h>
#include <stdlib.h>
+#include <bpf/skel_internal.h>
#include <linux/bpf.h>
#include <linux/compiler.h>
#include <linux/kernel.h>
@@ -52,6 +57,7 @@ static inline void *u64_to_ptr(__u64 ptr)
})
#define ERR_MAX_LEN 1024
+#define MAX_SIG_SIZE 4096
#define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx"
@@ -85,6 +91,9 @@ extern bool relaxed_maps;
extern bool use_loader;
extern struct btf *base_btf;
extern struct hashmap *refs_table;
+extern bool sign_progs;
+extern const char *private_key_path;
+extern const char *cert_path;
void __printf(1, 2) p_err(const char *fmt, ...);
void __printf(1, 2) p_info(const char *fmt, ...);
@@ -166,6 +175,7 @@ int do_tracelog(int argc, char **arg) __weak;
int do_feature(int argc, char **argv) __weak;
int do_struct_ops(int argc, char **argv) __weak;
int do_iter(int argc, char **argv) __weak;
+int do_token(int argc, char **argv) __weak;
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);
@@ -274,4 +284,15 @@ int pathname_concat(char *buf, int buf_sz, const char *path,
/* print netfilter bpf_link info */
void netfilter_dump_plain(const struct bpf_link_info *info);
void netfilter_dump_json(const struct bpf_link_info *info, json_writer_t *wtr);
+
+struct kernel_config_option {
+ const char *name;
+ bool macro_dump;
+};
+
+int read_kernel_config(const struct kernel_config_option *requested_options,
+ size_t num_options, char **out_values,
+ const char *define_prefix);
+int bpftool_prog_sign(struct bpf_load_and_run_opts *opts);
+__u32 register_session_key(const char *key_der_path);
#endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index c9de44a45778..7ebf7dbcfba4 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1477,7 +1477,8 @@ static int do_help(int argc, char **argv)
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
" cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
" queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
- " task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena }\n"
+ " task_storage | bloom_filter | user_ringbuf | cgrp_storage | arena |\n"
+ " insn_array }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-f|--bpffs} | {-n|--nomount} }\n"
"",
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 9722d841abc0..6daf19809ca4 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -23,6 +23,7 @@
#include <linux/err.h>
#include <linux/perf_event.h>
#include <linux/sizes.h>
+#include <linux/keyctl.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
@@ -714,7 +715,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
if (mode == DUMP_JITED) {
if (info->jited_prog_len == 0 || !info->jited_prog_insns) {
- p_info("no instructions returned");
+ p_err("error retrieving jit dump: no instructions returned or kernel.kptr_restrict set?");
return -1;
}
buf = u64_to_ptr(info->jited_prog_insns);
@@ -1930,6 +1931,8 @@ static int try_loader(struct gen_loader_opts *gen)
{
struct bpf_load_and_run_opts opts = {};
struct bpf_loader_ctx *ctx;
+ char sig_buf[MAX_SIG_SIZE];
+ __u8 prog_sha[SHA256_DIGEST_LENGTH];
int ctx_sz = sizeof(*ctx) + 64 * max(sizeof(struct bpf_map_desc),
sizeof(struct bpf_prog_desc));
int log_buf_sz = (1u << 24) - 1;
@@ -1953,6 +1956,26 @@ static int try_loader(struct gen_loader_opts *gen)
opts.insns = gen->insns;
opts.insns_sz = gen->insns_sz;
fds_before = count_open_fds();
+
+ if (sign_progs) {
+ opts.excl_prog_hash = prog_sha;
+ opts.excl_prog_hash_sz = sizeof(prog_sha);
+ opts.signature = sig_buf;
+ opts.signature_sz = MAX_SIG_SIZE;
+ opts.keyring_id = KEY_SPEC_SESSION_KEYRING;
+
+ err = bpftool_prog_sign(&opts);
+ if (err < 0) {
+ p_err("failed to sign program");
+ goto out;
+ }
+
+ err = register_session_key(cert_path);
+ if (err < 0) {
+ p_err("failed to add session key");
+ goto out;
+ }
+ }
err = bpf_load_and_run(&opts);
fd_delta = count_open_fds() - fds_before;
if (err < 0 || verifier_logs) {
@@ -1961,6 +1984,7 @@ static int try_loader(struct gen_loader_opts *gen)
fprintf(stderr, "loader prog leaked %d FDs\n",
fd_delta);
}
+out:
free(log_buf);
return err;
}
@@ -1988,6 +2012,9 @@ static int do_loader(int argc, char **argv)
goto err_close_obj;
}
+ if (sign_progs)
+ gen.gen_hash = true;
+
err = bpf_object__gen_loader(obj, &gen);
if (err)
goto err_close_obj;
@@ -2262,7 +2289,7 @@ static void profile_print_readings(void)
static char *profile_target_name(int tgt_fd)
{
- struct bpf_func_info func_info;
+ struct bpf_func_info func_info = {};
struct bpf_prog_info info = {};
__u32 info_len = sizeof(info);
const struct btf_type *t;
@@ -2562,7 +2589,7 @@ static int do_help(int argc, char **argv)
" METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
" " HELP_SPEC_OPTIONS " |\n"
" {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} |\n"
- " {-L|--use-loader} }\n"
+ " {-L|--use-loader} | [ {-S|--sign } {-k} <private_key.pem> {-i} <certificate.x509> ] \n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/sign.c b/tools/bpf/bpftool/sign.c
new file mode 100644
index 000000000000..f9b742f4bb10
--- /dev/null
+++ b/tools/bpf/bpftool/sign.c
@@ -0,0 +1,217 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/*
+ * Copyright (C) 2025 Google LLC.
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <getopt.h>
+#include <err.h>
+#include <openssl/opensslv.h>
+#include <openssl/bio.h>
+#include <openssl/evp.h>
+#include <openssl/pem.h>
+#include <openssl/err.h>
+#include <openssl/cms.h>
+#include <linux/keyctl.h>
+#include <errno.h>
+
+#include <bpf/skel_internal.h>
+
+#include "main.h"
+
+#define OPEN_SSL_ERR_BUF_LEN 256
+
+/* Use deprecated in 3.0 ERR_get_error_line_data for openssl < 3 */
+#if !defined(OPENSSL_VERSION_MAJOR) || (OPENSSL_VERSION_MAJOR < 3)
+#define ERR_get_error_all(file, line, func, data, flags) \
+ ERR_get_error_line_data(file, line, data, flags)
+#endif
+
+static void display_openssl_errors(int l)
+{
+ char buf[OPEN_SSL_ERR_BUF_LEN];
+ const char *file;
+ const char *data;
+ unsigned long e;
+ int flags;
+ int line;
+
+ while ((e = ERR_get_error_all(&file, &line, NULL, &data, &flags))) {
+ ERR_error_string_n(e, buf, sizeof(buf));
+ if (data && (flags & ERR_TXT_STRING)) {
+ p_err("OpenSSL %s: %s:%d: %s", buf, file, line, data);
+ } else {
+ p_err("OpenSSL %s: %s:%d", buf, file, line);
+ }
+ }
+}
+
+#define DISPLAY_OSSL_ERR(cond) \
+ do { \
+ bool __cond = (cond); \
+ if (__cond && ERR_peek_error()) \
+ display_openssl_errors(__LINE__);\
+ } while (0)
+
+static EVP_PKEY *read_private_key(const char *pkey_path)
+{
+ EVP_PKEY *private_key = NULL;
+ BIO *b;
+
+ b = BIO_new_file(pkey_path, "rb");
+ private_key = PEM_read_bio_PrivateKey(b, NULL, NULL, NULL);
+ BIO_free(b);
+ DISPLAY_OSSL_ERR(!private_key);
+ return private_key;
+}
+
+static X509 *read_x509(const char *x509_name)
+{
+ unsigned char buf[2];
+ X509 *x509 = NULL;
+ BIO *b;
+ int n;
+
+ b = BIO_new_file(x509_name, "rb");
+ if (!b)
+ goto cleanup;
+
+ /* Look at the first two bytes of the file to determine the encoding */
+ n = BIO_read(b, buf, 2);
+ if (n != 2)
+ goto cleanup;
+
+ if (BIO_reset(b) != 0)
+ goto cleanup;
+
+ if (buf[0] == 0x30 && buf[1] >= 0x81 && buf[1] <= 0x84)
+ /* Assume raw DER encoded X.509 */
+ x509 = d2i_X509_bio(b, NULL);
+ else
+ /* Assume PEM encoded X.509 */
+ x509 = PEM_read_bio_X509(b, NULL, NULL, NULL);
+
+cleanup:
+ BIO_free(b);
+ DISPLAY_OSSL_ERR(!x509);
+ return x509;
+}
+
+__u32 register_session_key(const char *key_der_path)
+{
+ unsigned char *der_buf = NULL;
+ X509 *x509 = NULL;
+ int key_id = -1;
+ int der_len;
+
+ if (!key_der_path)
+ return key_id;
+ x509 = read_x509(key_der_path);
+ if (!x509)
+ goto cleanup;
+ der_len = i2d_X509(x509, &der_buf);
+ if (der_len < 0)
+ goto cleanup;
+ key_id = syscall(__NR_add_key, "asymmetric", key_der_path, der_buf,
+ (size_t)der_len, KEY_SPEC_SESSION_KEYRING);
+cleanup:
+ X509_free(x509);
+ OPENSSL_free(der_buf);
+ DISPLAY_OSSL_ERR(key_id == -1);
+ return key_id;
+}
+
+int bpftool_prog_sign(struct bpf_load_and_run_opts *opts)
+{
+ BIO *bd_in = NULL, *bd_out = NULL;
+ EVP_PKEY *private_key = NULL;
+ CMS_ContentInfo *cms = NULL;
+ long actual_sig_len = 0;
+ X509 *x509 = NULL;
+ int err = 0;
+
+ bd_in = BIO_new_mem_buf(opts->insns, opts->insns_sz);
+ if (!bd_in) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ private_key = read_private_key(private_key_path);
+ if (!private_key) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ x509 = read_x509(cert_path);
+ if (!x509) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ cms = CMS_sign(NULL, NULL, NULL, NULL,
+ CMS_NOCERTS | CMS_PARTIAL | CMS_BINARY | CMS_DETACHED |
+ CMS_STREAM);
+ if (!cms) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (!CMS_add1_signer(cms, x509, private_key, EVP_sha256(),
+ CMS_NOCERTS | CMS_BINARY | CMS_NOSMIMECAP |
+ CMS_USE_KEYID | CMS_NOATTR)) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (CMS_final(cms, bd_in, NULL, CMS_NOCERTS | CMS_BINARY) != 1) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ EVP_Digest(opts->insns, opts->insns_sz, opts->excl_prog_hash,
+ &opts->excl_prog_hash_sz, EVP_sha256(), NULL);
+
+ bd_out = BIO_new(BIO_s_mem());
+ if (!bd_out) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ if (!i2d_CMS_bio_stream(bd_out, cms, NULL, 0)) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ actual_sig_len = BIO_get_mem_data(bd_out, NULL);
+ if (actual_sig_len <= 0) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ if ((size_t)actual_sig_len > opts->signature_sz) {
+ err = -ENOSPC;
+ goto cleanup;
+ }
+
+ if (BIO_read(bd_out, opts->signature, actual_sig_len) != actual_sig_len) {
+ err = -EIO;
+ goto cleanup;
+ }
+
+ opts->signature_sz = actual_sig_len;
+cleanup:
+ BIO_free(bd_out);
+ CMS_ContentInfo_free(cms);
+ X509_free(x509);
+ EVP_PKEY_free(private_key);
+ BIO_free(bd_in);
+ DISPLAY_OSSL_ERR(err < 0);
+ return err;
+}
diff --git a/tools/bpf/bpftool/token.c b/tools/bpf/bpftool/token.c
new file mode 100644
index 000000000000..c08f34b9d51b
--- /dev/null
+++ b/tools/bpf/bpftool/token.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2025 Didi Technology Co., Tao Chen */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <mntent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "json_writer.h"
+#include "main.h"
+
+#define MOUNTS_FILE "/proc/mounts"
+
+static struct {
+ const char *header;
+ const char *key;
+} sets[] = {
+ {"allowed_cmds", "delegate_cmds"},
+ {"allowed_maps", "delegate_maps"},
+ {"allowed_progs", "delegate_progs"},
+ {"allowed_attachs", "delegate_attachs"},
+};
+
+static bool has_delegate_options(const char *mnt_ops)
+{
+ return strstr(mnt_ops, "delegate_cmds") ||
+ strstr(mnt_ops, "delegate_maps") ||
+ strstr(mnt_ops, "delegate_progs") ||
+ strstr(mnt_ops, "delegate_attachs");
+}
+
+static char *get_delegate_value(char *opts, const char *key)
+{
+ char *token, *rest, *ret = NULL;
+
+ if (!opts)
+ return NULL;
+
+ for (token = strtok_r(opts, ",", &rest); token;
+ token = strtok_r(NULL, ",", &rest)) {
+ if (strncmp(token, key, strlen(key)) == 0 &&
+ token[strlen(key)] == '=') {
+ ret = token + strlen(key) + 1;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static void print_items_per_line(char *input, int items_per_line)
+{
+ char *str, *rest;
+ int cnt = 0;
+
+ if (!input)
+ return;
+
+ for (str = strtok_r(input, ":", &rest); str;
+ str = strtok_r(NULL, ":", &rest)) {
+ if (cnt % items_per_line == 0)
+ printf("\n\t ");
+
+ printf("%-20s", str);
+ cnt++;
+ }
+}
+
+#define ITEMS_PER_LINE 4
+static void show_token_info_plain(struct mntent *mntent)
+{
+ size_t i;
+
+ printf("token_info %s", mntent->mnt_dir);
+
+ for (i = 0; i < ARRAY_SIZE(sets); i++) {
+ char *opts, *value;
+
+ printf("\n\t%s:", sets[i].header);
+ opts = strdup(mntent->mnt_opts);
+ value = get_delegate_value(opts, sets[i].key);
+ print_items_per_line(value, ITEMS_PER_LINE);
+ free(opts);
+ }
+
+ printf("\n");
+}
+
+static void split_json_array_str(char *input)
+{
+ char *str, *rest;
+
+ if (!input) {
+ jsonw_start_array(json_wtr);
+ jsonw_end_array(json_wtr);
+ return;
+ }
+
+ jsonw_start_array(json_wtr);
+ for (str = strtok_r(input, ":", &rest); str;
+ str = strtok_r(NULL, ":", &rest)) {
+ jsonw_string(json_wtr, str);
+ }
+ jsonw_end_array(json_wtr);
+}
+
+static void show_token_info_json(struct mntent *mntent)
+{
+ size_t i;
+
+ jsonw_start_object(json_wtr);
+ jsonw_string_field(json_wtr, "token_info", mntent->mnt_dir);
+
+ for (i = 0; i < ARRAY_SIZE(sets); i++) {
+ char *opts, *value;
+
+ jsonw_name(json_wtr, sets[i].header);
+ opts = strdup(mntent->mnt_opts);
+ value = get_delegate_value(opts, sets[i].key);
+ split_json_array_str(value);
+ free(opts);
+ }
+
+ jsonw_end_object(json_wtr);
+}
+
+static int __show_token_info(struct mntent *mntent)
+{
+ if (json_output)
+ show_token_info_json(mntent);
+ else
+ show_token_info_plain(mntent);
+
+ return 0;
+}
+
+static int show_token_info(void)
+{
+ FILE *fp;
+ struct mntent *ent;
+
+ fp = setmntent(MOUNTS_FILE, "r");
+ if (!fp) {
+ p_err("Failed to open: %s", MOUNTS_FILE);
+ return -1;
+ }
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+
+ while ((ent = getmntent(fp)) != NULL) {
+ if (strncmp(ent->mnt_type, "bpf", 3) == 0) {
+ if (has_delegate_options(ent->mnt_opts))
+ __show_token_info(ent);
+ }
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ endmntent(fp);
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ if (argc)
+ return BAD_ARG();
+
+ return show_token_info();
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %1$s %2$s { show | list }\n"
+ " %1$s %2$s help\n"
+ " " HELP_SPEC_OPTIONS " }\n"
+ "\n"
+ "",
+ bin_name, argv[-2]);
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_token(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/tracelog.c b/tools/bpf/bpftool/tracelog.c
index 31d806e3bdaa..573a8d99f009 100644
--- a/tools/bpf/bpftool/tracelog.c
+++ b/tools/bpf/bpftool/tracelog.c
@@ -57,10 +57,8 @@ find_tracefs_mnt_single(unsigned long magic, char *mnt, const char *mntpt)
static bool get_tracefs_pipe(char *mnt)
{
static const char * const known_mnts[] = {
- "/sys/kernel/debug/tracing",
"/sys/kernel/tracing",
- "/tracing",
- "/trace",
+ "/sys/kernel/debug/tracing",
};
const char *pipe_name = "/trace_pipe";
const char *fstype = "tracefs";
@@ -95,12 +93,7 @@ static bool get_tracefs_pipe(char *mnt)
return false;
p_info("could not find tracefs, attempting to mount it now");
- /* Most of the time, tracefs is automatically mounted by debugfs at
- * /sys/kernel/debug/tracing when we try to access it. If we could not
- * find it, it is likely that debugfs is not mounted. Let's give one
- * attempt at mounting just tracefs at /sys/kernel/tracing.
- */
- strcpy(mnt, known_mnts[1]);
+ strcpy(mnt, known_mnts[0]);
if (mount_tracefs(mnt))
return false;
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
deleted file mode 100644
index 78a436c4072e..000000000000
--- a/tools/bpf/runqslower/Makefile
+++ /dev/null
@@ -1,91 +0,0 @@
-# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-include ../../scripts/Makefile.include
-
-OUTPUT ?= $(abspath .output)/
-
-BPFTOOL_OUTPUT := $(OUTPUT)bpftool/
-DEFAULT_BPFTOOL := $(BPFTOOL_OUTPUT)bootstrap/bpftool
-BPFTOOL ?= $(DEFAULT_BPFTOOL)
-BPF_TARGET_ENDIAN ?= --target=bpf
-LIBBPF_SRC := $(abspath ../../lib/bpf)
-BPFOBJ_OUTPUT := $(OUTPUT)libbpf/
-BPFOBJ := $(BPFOBJ_OUTPUT)libbpf.a
-BPF_DESTDIR := $(BPFOBJ_OUTPUT)
-BPF_INCLUDE := $(BPF_DESTDIR)/include
-INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../include/uapi)
-CFLAGS := -g -Wall $(CLANG_CROSS_FLAGS)
-CFLAGS += $(EXTRA_CFLAGS)
-LDFLAGS += $(EXTRA_LDFLAGS)
-LDLIBS += -lelf -lz
-
-# Try to detect best kernel BTF source
-KERNEL_REL := $(shell uname -r)
-VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
- $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
- ../../../vmlinux /sys/kernel/btf/vmlinux \
- /boot/vmlinux-$(KERNEL_REL)
-VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \
- $(wildcard $(VMLINUX_BTF_PATHS))))
-
-ifneq ($(V),1)
-MAKEFLAGS += --no-print-directory
-submake_extras := feature_display=0
-endif
-
-.DELETE_ON_ERROR:
-
-.PHONY: all clean runqslower libbpf_hdrs
-all: runqslower
-
-runqslower: $(OUTPUT)/runqslower
-
-clean:
- $(call QUIET_CLEAN, runqslower)
- $(Q)$(RM) -r $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT)
- $(Q)$(RM) $(OUTPUT)*.o $(OUTPUT)*.d
- $(Q)$(RM) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
- $(Q)$(RM) $(OUTPUT)runqslower
- $(Q)$(RM) -r .output
-
-libbpf_hdrs: $(BPFOBJ)
-
-$(OUTPUT)/runqslower: $(OUTPUT)/runqslower.o $(BPFOBJ)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@
-
-$(OUTPUT)/runqslower.o: runqslower.h $(OUTPUT)/runqslower.skel.h \
- $(OUTPUT)/runqslower.bpf.o | libbpf_hdrs
-
-$(OUTPUT)/runqslower.bpf.o: $(OUTPUT)/vmlinux.h runqslower.h | libbpf_hdrs
-
-$(OUTPUT)/%.skel.h: $(OUTPUT)/%.bpf.o | $(BPFTOOL)
- $(QUIET_GEN)$(BPFTOOL) gen skeleton $< > $@
-
-$(OUTPUT)/%.bpf.o: %.bpf.c $(BPFOBJ) | $(OUTPUT)
- $(QUIET_GEN)$(CLANG) -g -O2 $(BPF_TARGET_ENDIAN) $(INCLUDES) \
- -c $(filter %.c,$^) -o $@ && \
- $(LLVM_STRIP) -g $@
-
-$(OUTPUT)/%.o: %.c | $(OUTPUT)
- $(QUIET_CC)$(CC) $(CFLAGS) $(INCLUDES) -c $(filter %.c,$^) -o $@
-
-$(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT):
- $(QUIET_MKDIR)mkdir -p $@
-
-$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
-ifeq ($(VMLINUX_H),)
- $(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
- echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \
- "specify its location." >&2; \
- exit 1;\
- fi
- $(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
-else
- $(Q)cp "$(VMLINUX_H)" $@
-endif
-
-$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT)
- $(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) \
- DESTDIR=$(BPFOBJ_OUTPUT) prefix= $(abspath $@) install_headers
-
-$(DEFAULT_BPFTOOL): | $(BPFTOOL_OUTPUT)
- $(Q)$(MAKE) $(submake_extras) -C ../bpftool OUTPUT=$(BPFTOOL_OUTPUT) bootstrap
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
deleted file mode 100644
index fced54a3adf6..000000000000
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ /dev/null
@@ -1,106 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2019 Facebook
-#include "vmlinux.h"
-#include <bpf/bpf_helpers.h>
-#include "runqslower.h"
-
-#define TASK_RUNNING 0
-#define BPF_F_CURRENT_CPU 0xffffffffULL
-
-const volatile __u64 min_us = 0;
-const volatile pid_t targ_pid = 0;
-
-struct {
- __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
- __uint(map_flags, BPF_F_NO_PREALLOC);
- __type(key, int);
- __type(value, u64);
-} start SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(u32));
-} events SEC(".maps");
-
-/* record enqueue timestamp */
-__always_inline
-static int trace_enqueue(struct task_struct *t)
-{
- u32 pid = t->pid;
- u64 *ptr;
-
- if (!pid || (targ_pid && targ_pid != pid))
- return 0;
-
- ptr = bpf_task_storage_get(&start, t, 0,
- BPF_LOCAL_STORAGE_GET_F_CREATE);
- if (!ptr)
- return 0;
-
- *ptr = bpf_ktime_get_ns();
- return 0;
-}
-
-SEC("tp_btf/sched_wakeup")
-int handle__sched_wakeup(u64 *ctx)
-{
- /* TP_PROTO(struct task_struct *p) */
- struct task_struct *p = (void *)ctx[0];
-
- return trace_enqueue(p);
-}
-
-SEC("tp_btf/sched_wakeup_new")
-int handle__sched_wakeup_new(u64 *ctx)
-{
- /* TP_PROTO(struct task_struct *p) */
- struct task_struct *p = (void *)ctx[0];
-
- return trace_enqueue(p);
-}
-
-SEC("tp_btf/sched_switch")
-int handle__sched_switch(u64 *ctx)
-{
- /* TP_PROTO(bool preempt, struct task_struct *prev,
- * struct task_struct *next)
- */
- struct task_struct *prev = (struct task_struct *)ctx[1];
- struct task_struct *next = (struct task_struct *)ctx[2];
- struct runq_event event = {};
- u64 *tsp, delta_us;
- u32 pid;
-
- /* ivcsw: treat like an enqueue event and store timestamp */
- if (prev->__state == TASK_RUNNING)
- trace_enqueue(prev);
-
- pid = next->pid;
-
- /* For pid mismatch, save a bpf_task_storage_get */
- if (!pid || (targ_pid && targ_pid != pid))
- return 0;
-
- /* fetch timestamp and calculate delta */
- tsp = bpf_task_storage_get(&start, next, 0, 0);
- if (!tsp)
- return 0; /* missed enqueue */
-
- delta_us = (bpf_ktime_get_ns() - *tsp) / 1000;
- if (min_us && delta_us <= min_us)
- return 0;
-
- event.pid = pid;
- event.delta_us = delta_us;
- bpf_get_current_comm(&event.task, sizeof(event.task));
-
- /* output */
- bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
- &event, sizeof(event));
-
- bpf_task_storage_delete(&start, next);
- return 0;
-}
-
-char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/bpf/runqslower/runqslower.c b/tools/bpf/runqslower/runqslower.c
deleted file mode 100644
index 83c5993a139a..000000000000
--- a/tools/bpf/runqslower/runqslower.c
+++ /dev/null
@@ -1,171 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-// Copyright (c) 2019 Facebook
-#include <argp.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include <bpf/libbpf.h>
-#include <bpf/bpf.h>
-#include "runqslower.h"
-#include "runqslower.skel.h"
-
-struct env {
- pid_t pid;
- __u64 min_us;
- bool verbose;
-} env = {
- .min_us = 10000,
-};
-
-const char *argp_program_version = "runqslower 0.1";
-const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
-const char argp_program_doc[] =
-"runqslower Trace long process scheduling delays.\n"
-" For Linux, uses eBPF, BPF CO-RE, libbpf, BTF.\n"
-"\n"
-"This script traces high scheduling delays between tasks being\n"
-"ready to run and them running on CPU after that.\n"
-"\n"
-"USAGE: runqslower [-p PID] [min_us]\n"
-"\n"
-"EXAMPLES:\n"
-" runqslower # trace run queue latency higher than 10000 us (default)\n"
-" runqslower 1000 # trace run queue latency higher than 1000 us\n"
-" runqslower -p 123 # trace pid 123 only\n";
-
-static const struct argp_option opts[] = {
- { "pid", 'p', "PID", 0, "Process PID to trace"},
- { "verbose", 'v', NULL, 0, "Verbose debug output" },
- {},
-};
-
-static error_t parse_arg(int key, char *arg, struct argp_state *state)
-{
- static int pos_args;
- int pid;
- long long min_us;
-
- switch (key) {
- case 'v':
- env.verbose = true;
- break;
- case 'p':
- errno = 0;
- pid = strtol(arg, NULL, 10);
- if (errno || pid <= 0) {
- fprintf(stderr, "Invalid PID: %s\n", arg);
- argp_usage(state);
- }
- env.pid = pid;
- break;
- case ARGP_KEY_ARG:
- if (pos_args++) {
- fprintf(stderr,
- "Unrecognized positional argument: %s\n", arg);
- argp_usage(state);
- }
- errno = 0;
- min_us = strtoll(arg, NULL, 10);
- if (errno || min_us <= 0) {
- fprintf(stderr, "Invalid delay (in us): %s\n", arg);
- argp_usage(state);
- }
- env.min_us = min_us;
- break;
- default:
- return ARGP_ERR_UNKNOWN;
- }
- return 0;
-}
-
-int libbpf_print_fn(enum libbpf_print_level level,
- const char *format, va_list args)
-{
- if (level == LIBBPF_DEBUG && !env.verbose)
- return 0;
- return vfprintf(stderr, format, args);
-}
-
-void handle_event(void *ctx, int cpu, void *data, __u32 data_sz)
-{
- const struct runq_event *e = data;
- struct tm *tm;
- char ts[32];
- time_t t;
-
- time(&t);
- tm = localtime(&t);
- strftime(ts, sizeof(ts), "%H:%M:%S", tm);
- printf("%-8s %-16s %-6d %14llu\n", ts, e->task, e->pid, e->delta_us);
-}
-
-void handle_lost_events(void *ctx, int cpu, __u64 lost_cnt)
-{
- printf("Lost %llu events on CPU #%d!\n", lost_cnt, cpu);
-}
-
-int main(int argc, char **argv)
-{
- static const struct argp argp = {
- .options = opts,
- .parser = parse_arg,
- .doc = argp_program_doc,
- };
- struct perf_buffer *pb = NULL;
- struct runqslower_bpf *obj;
- int err;
-
- err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
- if (err)
- return err;
-
- libbpf_set_print(libbpf_print_fn);
-
- /* Use libbpf 1.0 API mode */
- libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
- obj = runqslower_bpf__open();
- if (!obj) {
- fprintf(stderr, "failed to open and/or load BPF object\n");
- return 1;
- }
-
- /* initialize global data (filtering options) */
- obj->rodata->targ_pid = env.pid;
- obj->rodata->min_us = env.min_us;
-
- err = runqslower_bpf__load(obj);
- if (err) {
- fprintf(stderr, "failed to load BPF object: %d\n", err);
- goto cleanup;
- }
-
- err = runqslower_bpf__attach(obj);
- if (err) {
- fprintf(stderr, "failed to attach BPF programs\n");
- goto cleanup;
- }
-
- printf("Tracing run queue latency higher than %llu us\n", env.min_us);
- printf("%-8s %-16s %-6s %14s\n", "TIME", "COMM", "PID", "LAT(us)");
-
- pb = perf_buffer__new(bpf_map__fd(obj->maps.events), 64,
- handle_event, handle_lost_events, NULL, NULL);
- err = libbpf_get_error(pb);
- if (err) {
- pb = NULL;
- fprintf(stderr, "failed to open perf buffer: %d\n", err);
- goto cleanup;
- }
-
- while ((err = perf_buffer__poll(pb, 100)) >= 0)
- ;
- printf("Error polling perf buffer: %d\n", err);
-
-cleanup:
- perf_buffer__free(pb);
- runqslower_bpf__destroy(obj);
-
- return err != 0;
-}
diff --git a/tools/bpf/runqslower/runqslower.h b/tools/bpf/runqslower/runqslower.h
deleted file mode 100644
index 4f70f07200c2..000000000000
--- a/tools/bpf/runqslower/runqslower.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __RUNQSLOWER_H
-#define __RUNQSLOWER_H
-
-#define TASK_COMM_LEN 16
-
-struct runq_event {
- char task[TASK_COMM_LEN];
- __u64 delta_us;
- pid_t pid;
-};
-
-#endif /* __RUNQSLOWER_H */
diff --git a/tools/build/Build b/tools/build/Build
new file mode 100644
index 000000000000..1c7e598e9f59
--- /dev/null
+++ b/tools/build/Build
@@ -0,0 +1,2 @@
+hostprogs := fixdep
+fixdep-y := fixdep.o
diff --git a/tools/build/Makefile b/tools/build/Makefile
index 63ef21878761..3a5a3808ab2a 100644
--- a/tools/build/Makefile
+++ b/tools/build/Makefile
@@ -37,5 +37,22 @@ ifneq ($(wildcard $(TMP_O)),)
$(Q)$(MAKE) -C feature OUTPUT=$(TMP_O) clean >/dev/null
endif
-$(OUTPUT)fixdep: $(srctree)/tools/build/fixdep.c
- $(QUIET_CC)$(HOSTCC) $(KBUILD_HOSTCFLAGS) $(KBUILD_HOSTLDFLAGS) -o $@ $<
+FIXDEP := $(OUTPUT)fixdep
+FIXDEP_IN := $(OUTPUT)fixdep-in.o
+
+# To track fixdep's dependencies properly, fixdep needs to run on itself.
+# Build it twice the first time.
+$(FIXDEP_IN): FORCE
+ $(Q)if [ ! -f $(FIXDEP) ]; then \
+ $(MAKE) $(build)=fixdep HOSTCFLAGS="$(KBUILD_HOSTCFLAGS)"; \
+ rm -f $(FIXDEP).o; \
+ fi
+ $(Q)$(MAKE) $(build)=fixdep HOSTCFLAGS="$(KBUILD_HOSTCFLAGS)"
+
+
+$(FIXDEP): $(FIXDEP_IN)
+ $(QUIET_LINK)$(HOSTCC) $(FIXDEP_IN) $(KBUILD_HOSTLDFLAGS) -o $@
+
+FORCE:
+
+.PHONY: FORCE
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 649c5ab8e8f2..a7f030fc5e83 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -68,7 +68,6 @@ FEATURE_TESTS_BASIC := \
libdw \
eventfd \
fortify-source \
- get_current_dir_name \
gettid \
glibc \
libbfd \
@@ -80,11 +79,9 @@ FEATURE_TESTS_BASIC := \
libelf-zstd \
libnuma \
numa_num_possible_cpus \
- libperl \
libpython \
libslang \
libtraceevent \
- libtracefs \
libcpupower \
pthread-attr-setaffinity-np \
pthread-barrier \
@@ -93,7 +90,6 @@ FEATURE_TESTS_BASIC := \
timerfd \
zlib \
lzma \
- get_cpuid \
bpf \
scandirat \
sched_getcpu \
@@ -121,11 +117,11 @@ FEATURE_TESTS_EXTRA := \
libbfd-liberty \
libbfd-liberty-z \
libopencsd \
+ libperl \
cxx \
llvm \
clang \
libbpf \
- libbpf-strings \
libpfm4 \
libdebuginfod \
clang-bpf-co-re \
@@ -144,13 +140,11 @@ FEATURE_DISPLAY ?= \
libelf \
libnuma \
numa_num_possible_cpus \
- libperl \
libpython \
libcapstone \
llvm-perf \
zlib \
lzma \
- get_cpuid \
bpf \
libaio \
libzstd
@@ -319,5 +313,7 @@ endef
ifeq ($(FEATURE_DISPLAY_DEFERRED),)
$(call feature_display_entries)
- $(info )
+ ifeq ($(feature_display),1)
+ $(info )
+ endif
endif
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index b41a42818d8a..87a5a908d6fa 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -8,7 +8,6 @@ FILES= \
test-libdw.bin \
test-eventfd.bin \
test-fortify-source.bin \
- test-get_current_dir_name.bin \
test-glibc.bin \
test-gtk2.bin \
test-gtk2-infobar.bin \
@@ -34,7 +33,6 @@ FILES= \
test-libperl.bin \
test-libpython.bin \
test-libslang.bin \
- test-libslang-include-subdir.bin \
test-libtraceevent.bin \
test-libcpupower.bin \
test-libtracefs.bin \
@@ -58,8 +56,6 @@ FILES= \
test-lzma.bin \
test-bpf.bin \
test-libbpf.bin \
- test-libbpf-strings.bin \
- test-get_cpuid.bin \
test-sdt.bin \
test-cxx.bin \
test-gettid.bin \
@@ -94,7 +90,7 @@ else
# paths are used instead.
ifdef CROSS_COMPILE
ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),)
- CROSS_ARCH = $(shell $(CC) -dumpmachine)
+ CROSS_ARCH = $(notdir $(CROSS_COMPILE:%-=%))
PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/
PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/
PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/
@@ -110,7 +106,7 @@ all: $(FILES)
__BUILD = $(CC) $(CFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFLAGS)
BUILD = $(__BUILD) > $(@:.bin=.make.output) 2>&1
BUILD_BFD = $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
- BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -lzstd
+ BUILD_ALL = $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -lslang $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -ldl -lz -llzma -lzstd
__BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(@F)) $(LDFLAGS)
BUILDXX = $(__BUILDXX) > $(@:.bin=.make.output) 2>&1
@@ -118,7 +114,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(
###############################
$(OUTPUT)test-all.bin:
- $(BUILD_ALL) || $(BUILD_ALL) -lopcodes -liberty
+ $(BUILD_ALL)
$(OUTPUT)test-hello.bin:
$(BUILD)
@@ -147,9 +143,6 @@ $(OUTPUT)test-libelf.bin:
$(OUTPUT)test-eventfd.bin:
$(BUILD)
-$(OUTPUT)test-get_current_dir_name.bin:
- $(BUILD)
-
$(OUTPUT)test-glibc.bin:
$(BUILD)
@@ -234,9 +227,6 @@ $(OUTPUT)test-libunwind-debug-frame-aarch64.bin:
$(OUTPUT)test-libslang.bin:
$(BUILD) -lslang
-$(OUTPUT)test-libslang-include-subdir.bin:
- $(BUILD) -lslang
-
$(OUTPUT)test-libtraceevent.bin:
$(BUILD) -ltraceevent
@@ -316,10 +306,10 @@ $(OUTPUT)test-libcapstone.bin:
$(BUILD) # -lcapstone provided by $(FEATURE_CHECK_LDFLAGS-libcapstone)
$(OUTPUT)test-compile-32.bin:
- $(CC) -m32 -o $@ test-compile.c
+ $(CC) -m32 -Wall -Werror -o $@ test-compile.c
$(OUTPUT)test-compile-x32.bin:
- $(CC) -mx32 -o $@ test-compile.c
+ $(CC) -mx32 -Wall -Werror -o $@ test-compile.c
$(OUTPUT)test-zlib.bin:
$(BUILD) -lz
@@ -327,18 +317,12 @@ $(OUTPUT)test-zlib.bin:
$(OUTPUT)test-lzma.bin:
$(BUILD) -llzma
-$(OUTPUT)test-get_cpuid.bin:
- $(BUILD)
-
$(OUTPUT)test-bpf.bin:
$(BUILD)
$(OUTPUT)test-libbpf.bin:
$(BUILD) -lbpf
-$(OUTPUT)test-libbpf-strings.bin:
- $(BUILD)
-
$(OUTPUT)test-sdt.bin:
$(BUILD)
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 4419fb4710bd..eb346160d0ba 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -7,17 +7,13 @@
*/
/*
- * Quirk: Python and Perl headers cannot be in arbitrary places, so keep
- * these 3 testcases at the top:
+ * Quirk: Python headers cannot be in arbitrary places, so keep this testcase at
+ * the top:
*/
#define main main_test_libpython
# include "test-libpython.c"
#undef main
-#define main main_test_libperl
-# include "test-libperl.c"
-#undef main
-
#define main main_test_hello
# include "test-hello.c"
#undef main
@@ -26,10 +22,6 @@
# include "test-libelf.c"
#undef main
-#define main main_test_get_current_dir_name
-# include "test-get_current_dir_name.c"
-#undef main
-
#define main main_test_gettid
# include "test-gettid.c"
#undef main
@@ -122,10 +114,6 @@
# include "test-lzma.c"
#undef main
-#define main main_test_get_cpuid
-# include "test-get_cpuid.c"
-#undef main
-
#define main main_test_bpf
# include "test-bpf.c"
#undef main
@@ -154,17 +142,11 @@
# include "test-libtraceevent.c"
#undef main
-#define main main_test_libtracefs
-# include "test-libtracefs.c"
-#undef main
-
int main(int argc, char *argv[])
{
main_test_libpython();
- main_test_libperl();
main_test_hello();
main_test_libelf();
- main_test_get_current_dir_name();
main_test_gettid();
main_test_glibc();
main_test_libdw();
@@ -182,7 +164,6 @@ int main(int argc, char *argv[])
main_test_pthread_attr_setaffinity_np();
main_test_pthread_barrier();
main_test_lzma();
- main_test_get_cpuid();
main_test_bpf();
main_test_scandirat();
main_test_sched_getcpu();
@@ -192,7 +173,6 @@ int main(int argc, char *argv[])
main_test_reallocarray();
main_test_libzstd();
main_test_libtraceevent();
- main_test_libtracefs();
return 0;
}
diff --git a/tools/build/feature/test-get_cpuid.c b/tools/build/feature/test-get_cpuid.c
deleted file mode 100644
index bb4f065f28a6..000000000000
--- a/tools/build/feature/test-get_cpuid.c
+++ /dev/null
@@ -1,8 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <cpuid.h>
-
-int main(void)
-{
- unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
- return __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
-}
diff --git a/tools/build/feature/test-get_current_dir_name.c b/tools/build/feature/test-get_current_dir_name.c
deleted file mode 100644
index c3c201691b4f..000000000000
--- a/tools/build/feature/test-get_current_dir_name.c
+++ /dev/null
@@ -1,11 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-#include <unistd.h>
-#include <stdlib.h>
-
-int main(void)
-{
- free(get_current_dir_name());
- return 0;
-}
-#undef _GNU_SOURCE
diff --git a/tools/build/feature/test-libbpf-strings.c b/tools/build/feature/test-libbpf-strings.c
deleted file mode 100644
index 83e6c45f5c85..000000000000
--- a/tools/build/feature/test-libbpf-strings.c
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <bpf/btf.h>
-
-int main(void)
-{
- struct btf_dump_type_data_opts opts;
-
- opts.emit_strings = 0;
- return opts.emit_strings;
-}
diff --git a/tools/build/feature/test-libslang-include-subdir.c b/tools/build/feature/test-libslang-include-subdir.c
deleted file mode 100644
index 3ea47ec7590e..000000000000
--- a/tools/build/feature/test-libslang-include-subdir.c
+++ /dev/null
@@ -1,7 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <slang/slang.h>
-
-int main(void)
-{
- return SLsmg_init_smg();
-}
diff --git a/tools/dma/.gitignore b/tools/dma/.gitignore
new file mode 100644
index 000000000000..94b68cf4147b
--- /dev/null
+++ b/tools/dma/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+dma_map_benchmark
+include/linux/map_benchmark.h
diff --git a/tools/dma/Makefile b/tools/dma/Makefile
new file mode 100644
index 000000000000..e4abf37bf020
--- /dev/null
+++ b/tools/dma/Makefile
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../scripts/Makefile.include
+
+bindir ?= /usr/bin
+
+# This will work when dma is built in tools env. where srctree
+# isn't set and when invoked from selftests build, where srctree
+# is set to ".". building_out_of_srctree is undefined for in srctree
+# builds
+ifndef building_out_of_srctree
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := dma_map_benchmark
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
+
+all: $(ALL_PROGRAMS)
+
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+#
+# We need the following to be outside of kernel tree
+#
+$(OUTPUT)include/linux/map_benchmark.h: ../../include/uapi/linux/map_benchmark.h
+ mkdir -p $(OUTPUT)include/linux 2>&1 || true
+ ln -sf $(CURDIR)/../../include/uapi/linux/map_benchmark.h $@
+
+prepare: $(OUTPUT)include/linux/map_benchmark.h
+
+FORCE:
+
+DMA_MAP_BENCHMARK = dma_map_benchmark
+$(DMA_MAP_BENCHMARK): prepare FORCE
+ $(CC) $(CFLAGS) $(DMA_MAP_BENCHMARK).c -o $(DMA_MAP_BENCHMARK)
+
+clean:
+ rm -f $(ALL_PROGRAMS)
+ rm -rf $(OUTPUT)include
+ find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete
+
+install: $(ALL_PROGRAMS)
+ install -d -m 755 $(DESTDIR)$(bindir); \
+ for program in $(ALL_PROGRAMS); do \
+ install $$program $(DESTDIR)$(bindir); \
+ done
+
+.PHONY: all install clean prepare FORCE
diff --git a/tools/testing/selftests/dma/config b/tools/dma/config
index 6102ee3c43cd..6102ee3c43cd 100644
--- a/tools/testing/selftests/dma/config
+++ b/tools/dma/config
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/dma/dma_map_benchmark.c
index b12f1f9babf8..dd0ed528e6df 100644
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ b/tools/dma/dma_map_benchmark.c
@@ -10,7 +10,6 @@
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
-#include <linux/types.h>
#include <linux/map_benchmark.h>
#define NSEC_PER_MSEC 1000000L
@@ -118,7 +117,7 @@ int main(int argc, char **argv)
}
printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n",
- threads, seconds, node, dir[directions], granule);
+ threads, seconds, node, directions[dir], granule);
printf("average map latency(us):%.1f standard deviation:%.1f\n",
map.avg_map_100ns/10.0, map.map_stddev/10.0);
printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
diff --git a/tools/docs/check-variable-fonts.py b/tools/docs/check-variable-fonts.py
new file mode 100755
index 000000000000..958d5a745724
--- /dev/null
+++ b/tools/docs/check-variable-fonts.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) Akira Yokosawa, 2024
+#
+# Ported to Python by (c) Mauro Carvalho Chehab, 2025
+#
+# pylint: disable=C0103
+
+"""
+Detect problematic Noto CJK variable fonts.
+
+or more details, see .../tools/lib/python/kdoc/latex_fonts.py.
+"""
+
+import argparse
+import sys
+import os.path
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(src_dir, '../lib/python'))
+
+from kdoc.latex_fonts import LatexFontChecker
+
+checker = LatexFontChecker()
+
+parser=argparse.ArgumentParser(description=checker.description(),
+ formatter_class=argparse.RawTextHelpFormatter)
+parser.add_argument("--deny-vf",
+ help="XDG_CONFIG_HOME dir containing fontconfig/fonts.conf file")
+
+args=parser.parse_args()
+
+msg = LatexFontChecker(args.deny_vf).check()
+if msg:
+ print(msg)
+
+sys.exit(1)
diff --git a/tools/docs/checktransupdate.py b/tools/docs/checktransupdate.py
new file mode 100755
index 000000000000..e894652369a5
--- /dev/null
+++ b/tools/docs/checktransupdate.py
@@ -0,0 +1,307 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+This script helps track the translation status of the documentation
+in different locales, e.g., zh_CN. More specially, it uses `git log`
+commit to find the latest english commit from the translation commit
+(order by author date) and the latest english commits from HEAD. If
+differences occur, report the file and commits that need to be updated.
+
+The usage is as follows:
+- tools/docs/checktransupdate.py -l zh_CN
+This will print all the files that need to be updated or translated in the zh_CN locale.
+- tools/docs/checktransupdate.py Documentation/translations/zh_CN/dev-tools/testing-overview.rst
+This will only print the status of the specified file.
+
+The output is something like:
+Documentation/dev-tools/kfence.rst
+No translation in the locale of zh_CN
+
+Documentation/translations/zh_CN/dev-tools/testing-overview.rst
+commit 42fb9cfd5b18 ("Documentation: dev-tools: Add link to RV docs")
+1 commits needs resolving in total
+"""
+
+import os
+import re
+import time
+import logging
+from argparse import ArgumentParser, ArgumentTypeError, BooleanOptionalAction
+from datetime import datetime
+
+
+def get_origin_path(file_path):
+ """Get the origin path from the translation path"""
+ paths = file_path.split("/")
+ tidx = paths.index("translations")
+ opaths = paths[:tidx]
+ opaths += paths[tidx + 2 :]
+ return "/".join(opaths)
+
+
+def get_latest_commit_from(file_path, commit):
+ """Get the latest commit from the specified commit for the specified file"""
+ command = f"git log --pretty=format:%H%n%aD%n%cD%n%n%B {commit} -1 -- {file_path}"
+ logging.debug(command)
+ pipe = os.popen(command)
+ result = pipe.read()
+ result = result.split("\n")
+ if len(result) <= 1:
+ return None
+
+ logging.debug("Result: %s", result[0])
+
+ return {
+ "hash": result[0],
+ "author_date": datetime.strptime(result[1], "%a, %d %b %Y %H:%M:%S %z"),
+ "commit_date": datetime.strptime(result[2], "%a, %d %b %Y %H:%M:%S %z"),
+ "message": result[4:],
+ }
+
+
+def get_origin_from_trans(origin_path, t_from_head):
+ """Get the latest origin commit from the translation commit"""
+ o_from_t = get_latest_commit_from(origin_path, t_from_head["hash"])
+ while o_from_t is not None and o_from_t["author_date"] > t_from_head["author_date"]:
+ o_from_t = get_latest_commit_from(origin_path, o_from_t["hash"] + "^")
+ if o_from_t is not None:
+ logging.debug("tracked origin commit id: %s", o_from_t["hash"])
+ return o_from_t
+
+
+def get_origin_from_trans_smartly(origin_path, t_from_head):
+ """Get the latest origin commit from the formatted translation commit:
+ (1) update to commit HASH (TITLE)
+ (2) Update the translation through commit HASH (TITLE)
+ """
+ # catch flag for 12-bit commit hash
+ HASH = r'([0-9a-f]{12})'
+ # pattern 1: contains "update to commit HASH"
+ pat_update_to = re.compile(rf'update to commit {HASH}')
+ # pattern 2: contains "Update the translation through commit HASH"
+ pat_update_translation = re.compile(rf'Update the translation through commit {HASH}')
+
+ origin_commit_hash = None
+ for line in t_from_head["message"]:
+ # check if the line matches the first pattern
+ match = pat_update_to.search(line)
+ if match:
+ origin_commit_hash = match.group(1)
+ break
+ # check if the line matches the second pattern
+ match = pat_update_translation.search(line)
+ if match:
+ origin_commit_hash = match.group(1)
+ break
+ if origin_commit_hash is None:
+ return None
+ o_from_t = get_latest_commit_from(origin_path, origin_commit_hash)
+ if o_from_t is not None:
+ logging.debug("tracked origin commit id: %s", o_from_t["hash"])
+ return o_from_t
+
+
+def get_commits_count_between(opath, commit1, commit2):
+ """Get the commits count between two commits for the specified file"""
+ command = f"git log --pretty=format:%H {commit1}...{commit2} -- {opath}"
+ logging.debug(command)
+ pipe = os.popen(command)
+ result = pipe.read().split("\n")
+ # filter out empty lines
+ result = list(filter(lambda x: x != "", result))
+ return result
+
+
+def pretty_output(commit):
+ """Pretty print the commit message"""
+ command = f"git log --pretty='format:%h (\"%s\")' -1 {commit}"
+ logging.debug(command)
+ pipe = os.popen(command)
+ return pipe.read()
+
+
+def valid_commit(commit):
+ """Check if the commit is valid or not"""
+ msg = pretty_output(commit)
+ return "Merge tag" not in msg
+
+def check_per_file(file_path):
+ """Check the translation status for the specified file"""
+ opath = get_origin_path(file_path)
+
+ if not os.path.isfile(opath):
+ logging.error("Cannot find the origin path for {file_path}")
+ return
+
+ o_from_head = get_latest_commit_from(opath, "HEAD")
+ t_from_head = get_latest_commit_from(file_path, "HEAD")
+
+ if o_from_head is None or t_from_head is None:
+ logging.error("Cannot find the latest commit for %s", file_path)
+ return
+
+ o_from_t = get_origin_from_trans_smartly(opath, t_from_head)
+ # notice, o_from_t from get_*_smartly() is always more accurate than from get_*()
+ if o_from_t is None:
+ o_from_t = get_origin_from_trans(opath, t_from_head)
+
+ if o_from_t is None:
+ logging.error("Error: Cannot find the latest origin commit for %s", file_path)
+ return
+
+ if o_from_head["hash"] == o_from_t["hash"]:
+ logging.debug("No update needed for %s", file_path)
+ else:
+ logging.info(file_path)
+ commits = get_commits_count_between(
+ opath, o_from_t["hash"], o_from_head["hash"]
+ )
+ count = 0
+ for commit in commits:
+ if valid_commit(commit):
+ logging.info("commit %s", pretty_output(commit))
+ count += 1
+ logging.info("%d commits needs resolving in total\n", count)
+
+
+def valid_locales(locale):
+ """Check if the locale is valid or not"""
+ script_path = os.path.dirname(os.path.abspath(__file__))
+ linux_path = os.path.join(script_path, "../..")
+ if not os.path.isdir(f"{linux_path}/Documentation/translations/{locale}"):
+ raise ArgumentTypeError("Invalid locale: {locale}")
+ return locale
+
+
+def list_files_with_excluding_folders(folder, exclude_folders, include_suffix):
+ """List all files with the specified suffix in the folder and its subfolders"""
+ files = []
+ stack = [folder]
+
+ while stack:
+ pwd = stack.pop()
+ # filter out the exclude folders
+ if os.path.basename(pwd) in exclude_folders:
+ continue
+ # list all files and folders
+ for item in os.listdir(pwd):
+ ab_item = os.path.join(pwd, item)
+ if os.path.isdir(ab_item):
+ stack.append(ab_item)
+ else:
+ if ab_item.endswith(include_suffix):
+ files.append(ab_item)
+
+ return files
+
+
+class DmesgFormatter(logging.Formatter):
+ """Custom dmesg logging formatter"""
+ def format(self, record):
+ timestamp = time.time()
+ formatted_time = f"[{timestamp:>10.6f}]"
+ log_message = f"{formatted_time} {record.getMessage()}"
+ return log_message
+
+
+def config_logging(log_level, log_file="checktransupdate.log"):
+ """configure logging based on the log level"""
+ # set up the root logger
+ logger = logging.getLogger()
+ logger.setLevel(log_level)
+
+ # Create console handler
+ console_handler = logging.StreamHandler()
+ console_handler.setLevel(log_level)
+
+ # Create file handler
+ file_handler = logging.FileHandler(log_file)
+ file_handler.setLevel(log_level)
+
+ # Create formatter and add it to the handlers
+ formatter = DmesgFormatter()
+ console_handler.setFormatter(formatter)
+ file_handler.setFormatter(formatter)
+
+ # Add the handler to the logger
+ logger.addHandler(console_handler)
+ logger.addHandler(file_handler)
+
+
+def main():
+ """Main function of the script"""
+ script_path = os.path.dirname(os.path.abspath(__file__))
+ linux_path = os.path.join(script_path, "../..")
+
+ parser = ArgumentParser(description="Check the translation update")
+ parser.add_argument(
+ "-l",
+ "--locale",
+ default="zh_CN",
+ type=valid_locales,
+ help="Locale to check when files are not specified",
+ )
+
+ parser.add_argument(
+ "--print-missing-translations",
+ action=BooleanOptionalAction,
+ default=True,
+ help="Print files that do not have translations",
+ )
+
+ parser.add_argument(
+ '--log',
+ default='INFO',
+ choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
+ help='Set the logging level')
+
+ parser.add_argument(
+ '--logfile',
+ default='checktransupdate.log',
+ help='Set the logging file (default: checktransupdate.log)')
+
+ parser.add_argument(
+ "files", nargs="*", help="Files to check, if not specified, check all files"
+ )
+ args = parser.parse_args()
+
+ # Configure logging based on the --log argument
+ log_level = getattr(logging, args.log.upper(), logging.INFO)
+ config_logging(log_level)
+
+ # Get files related to linux path
+ files = args.files
+ if len(files) == 0:
+ offical_files = list_files_with_excluding_folders(
+ os.path.join(linux_path, "Documentation"), ["translations", "output"], "rst"
+ )
+
+ for file in offical_files:
+ # split the path into parts
+ path_parts = file.split(os.sep)
+ # find the index of the "Documentation" directory
+ kindex = path_parts.index("Documentation")
+ # insert the translations and locale after the Documentation directory
+ new_path_parts = path_parts[:kindex + 1] + ["translations", args.locale] \
+ + path_parts[kindex + 1 :]
+ # join the path parts back together
+ new_file = os.sep.join(new_path_parts)
+ if os.path.isfile(new_file):
+ files.append(new_file)
+ else:
+ if args.print_missing_translations:
+ logging.info(os.path.relpath(os.path.abspath(file), linux_path))
+ logging.info("No translation in the locale of %s\n", args.locale)
+
+ files = list(map(lambda x: os.path.relpath(os.path.abspath(x), linux_path), files))
+
+ # cd to linux root directory
+ os.chdir(linux_path)
+
+ for file in files:
+ check_per_file(file)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/docs/documentation-file-ref-check b/tools/docs/documentation-file-ref-check
new file mode 100755
index 000000000000..0cad42f6943b
--- /dev/null
+++ b/tools/docs/documentation-file-ref-check
@@ -0,0 +1,245 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+#
+# Treewide grep for references to files under Documentation, and report
+# non-existing files in stderr.
+
+use warnings;
+use strict;
+use Getopt::Long qw(:config no_auto_abbrev);
+
+# NOTE: only add things here when the file was gone, but the text wants
+# to mention a past documentation file, for example, to give credits for
+# the original work.
+my %false_positives = (
+ "Documentation/scsi/scsi_mid_low_api.rst" => "Documentation/Configure.help",
+ "drivers/vhost/vhost.c" => "Documentation/virtual/lguest/lguest.c",
+);
+
+my $scriptname = $0;
+$scriptname =~ s,tools/docs/([^/]+/),$1,;
+
+# Parse arguments
+my $help = 0;
+my $fix = 0;
+my $warn = 0;
+
+if (! -e ".git") {
+ printf "Warning: can't check if file exists, as this is not a git tree\n";
+ exit 0;
+}
+
+GetOptions(
+ 'fix' => \$fix,
+ 'warn' => \$warn,
+ 'h|help|usage' => \$help,
+);
+
+if ($help != 0) {
+ print "$scriptname [--help] [--fix]\n";
+ exit -1;
+}
+
+# Step 1: find broken references
+print "Finding broken references. This may take a while... " if ($fix);
+
+my %broken_ref;
+
+my $doc_fix = 0;
+
+open IN, "git grep ':doc:\`' Documentation/|"
+ or die "Failed to run git grep";
+while (<IN>) {
+ next if (!m,^([^:]+):.*\:doc\:\`([^\`]+)\`,);
+ next if (m,sphinx/,);
+
+ my $file = $1;
+ my $d = $1;
+ my $doc_ref = $2;
+
+ my $f = $doc_ref;
+
+ $d =~ s,(.*/).*,$1,;
+ $f =~ s,.*\<([^\>]+)\>,$1,;
+
+ if ($f =~ m,^/,) {
+ $f = "$f.rst";
+ $f =~ s,^/,Documentation/,;
+ } else {
+ $f = "$d$f.rst";
+ }
+
+ next if (grep -e, glob("$f"));
+
+ if ($fix && !$doc_fix) {
+ print STDERR "\nWARNING: Currently, can't fix broken :doc:`` fields\n";
+ }
+ $doc_fix++;
+
+ print STDERR "$file: :doc:`$doc_ref`\n";
+}
+close IN;
+
+open IN, "git grep 'Documentation/'|"
+ or die "Failed to run git grep";
+while (<IN>) {
+ next if (!m/^([^:]+):(.*)/);
+
+ my $f = $1;
+ my $ln = $2;
+
+ # On linux-next, discard the Next/ directory
+ next if ($f =~ m,^Next/,);
+
+ # Makefiles and scripts contain nasty expressions to parse docs
+ next if ($f =~ m/Makefile/ || $f =~ m/\.(sh|py|pl|~|rej|org|orig)$/);
+
+ # It doesn't make sense to parse hidden files
+ next if ($f =~ m#/\.#);
+
+ # Skip this script
+ next if ($f eq $scriptname);
+
+ # Ignore the dir where documentation will be built
+ next if ($ln =~ m,\b(\S*)Documentation/output,);
+
+ if ($ln =~ m,\b(\S*)(Documentation/[A-Za-z0-9\_\.\,\~/\*\[\]\?+-]*)(.*),) {
+ my $prefix = $1;
+ my $ref = $2;
+ my $base = $2;
+ my $extra = $3;
+
+ # some file references are like:
+ # /usr/src/linux/Documentation/DMA-{API,mapping}.txt
+ # For now, ignore them
+ next if ($extra =~ m/^{/);
+
+ # Remove footnotes at the end like:
+ # Documentation/devicetree/dt-object-internal.txt[1]
+ $ref =~ s/(txt|rst)\[\d+]$/$1/;
+
+ # Remove ending ']' without any '['
+ $ref =~ s/\].*// if (!($ref =~ m/\[/));
+
+ # Remove puntuation marks at the end
+ $ref =~ s/[\,\.]+$//;
+
+ my $fulref = "$prefix$ref";
+
+ $fulref =~ s/^(\<file|ref)://;
+ $fulref =~ s/^[\'\`]+//;
+ $fulref =~ s,^\$\(.*\)/,,;
+ $base =~ s,.*/,,;
+
+ # Remove URL false-positives
+ next if ($fulref =~ m/^http/);
+
+ # Remove sched-pelt false-positive
+ next if ($fulref =~ m,^Documentation/scheduler/sched-pelt$,);
+
+ # Discard some build examples from Documentation/target/tcm_mod_builder.rst
+ next if ($fulref =~ m,mnt/sdb/lio-core-2.6.git/Documentation/target,);
+
+ # Check if exists, evaluating wildcards
+ next if (grep -e, glob("$ref $fulref"));
+
+ # Accept relative Documentation patches for tools/
+ if ($f =~ m/tools/) {
+ my $path = $f;
+ $path =~ s,(.*)/.*,$1,;
+ $path =~ s,testing/selftests/bpf,bpf/bpftool,;
+ next if (grep -e, glob("$path/$ref $path/../$ref $path/$fulref"));
+ }
+
+ # Discard known false-positives
+ if (defined($false_positives{$f})) {
+ next if ($false_positives{$f} eq $fulref);
+ }
+
+ if ($fix) {
+ if (!($ref =~ m/(scripts|Kconfig|Kbuild)/)) {
+ $broken_ref{$ref}++;
+ }
+ } elsif ($warn) {
+ print STDERR "Warning: $f references a file that doesn't exist: $fulref\n";
+ } else {
+ print STDERR "$f: $fulref\n";
+ }
+ }
+}
+close IN;
+
+exit 0 if (!$fix);
+
+# Step 2: Seek for file name alternatives
+print "Auto-fixing broken references. Please double-check the results\n";
+
+foreach my $ref (keys %broken_ref) {
+ my $new =$ref;
+
+ my $basedir = ".";
+ # On translations, only seek inside the translations directory
+ $basedir = $1 if ($ref =~ m,(Documentation/translations/[^/]+),);
+
+ # get just the basename
+ $new =~ s,.*/,,;
+
+ my $f="";
+
+ # usual reason for breakage: DT file moved around
+ if ($ref =~ /devicetree/) {
+ # usual reason for breakage: DT file renamed to .yaml
+ if (!$f) {
+ my $new_ref = $ref;
+ $new_ref =~ s/\.txt$/.yaml/;
+ $f=$new_ref if (-f $new_ref);
+ }
+
+ if (!$f) {
+ my $search = $new;
+ $search =~ s,^.*/,,;
+ $f = qx(find Documentation/devicetree/ -iname "*$search*") if ($search);
+ if (!$f) {
+ # Manufacturer name may have changed
+ $search =~ s/^.*,//;
+ $f = qx(find Documentation/devicetree/ -iname "*$search*") if ($search);
+ }
+ }
+ }
+
+ # usual reason for breakage: file renamed to .rst
+ if (!$f) {
+ $new =~ s/\.txt$/.rst/;
+ $f=qx(find $basedir -iname $new) if ($new);
+ }
+
+ # usual reason for breakage: use dash or underline
+ if (!$f) {
+ $new =~ s/[-_]/[-_]/g;
+ $f=qx(find $basedir -iname $new) if ($new);
+ }
+
+ # Wild guess: seek for the same name on another place
+ if (!$f) {
+ $f = qx(find $basedir -iname $new) if ($new);
+ }
+
+ my @find = split /\s+/, $f;
+
+ if (!$f) {
+ print STDERR "ERROR: Didn't find a replacement for $ref\n";
+ } elsif (scalar(@find) > 1) {
+ print STDERR "WARNING: Won't auto-replace, as found multiple files close to $ref:\n";
+ foreach my $j (@find) {
+ $j =~ s,^./,,;
+ print STDERR " $j\n";
+ }
+ } else {
+ $f = $find[0];
+ $f =~ s,^./,,;
+ print "INFO: Replacing $ref to $f\n";
+ foreach my $j (qx(git grep -l $ref)) {
+ qx(sed "s\@$ref\@$f\@g" -i $j);
+ }
+ }
+}
diff --git a/tools/docs/features-refresh.sh b/tools/docs/features-refresh.sh
new file mode 100755
index 000000000000..c2288124e94a
--- /dev/null
+++ b/tools/docs/features-refresh.sh
@@ -0,0 +1,98 @@
+#
+# Small script that refreshes the kernel feature support status in place.
+#
+
+for F_FILE in Documentation/features/*/*/arch-support.txt; do
+ F=$(grep "^# Kconfig:" "$F_FILE" | cut -c26-)
+
+ #
+ # Each feature F is identified by a pair (O, K), where 'O' can
+ # be either the empty string (for 'nop') or "not" (the logical
+ # negation operator '!'); other operators are not supported.
+ #
+ O=""
+ K=$F
+ if [[ "$F" == !* ]]; then
+ O="not"
+ K=$(echo $F | sed -e 's/^!//g')
+ fi
+
+ #
+ # F := (O, K) is 'valid' iff there is a Kconfig file (for some
+ # arch) which contains K.
+ #
+ # Notice that this definition entails an 'asymmetry' between
+ # the case 'O = ""' and the case 'O = "not"'. E.g., F may be
+ # _invalid_ if:
+ #
+ # [case 'O = ""']
+ # 1) no arch provides support for F,
+ # 2) K does not exist (e.g., it was renamed/mis-typed);
+ #
+ # [case 'O = "not"']
+ # 3) all archs provide support for F,
+ # 4) as in (2).
+ #
+ # The rationale for adopting this definition (and, thus, for
+ # keeping the asymmetry) is:
+ #
+ # We want to be able to 'detect' (2) (or (4)).
+ #
+ # (1) and (3) may further warn the developers about the fact
+ # that K can be removed.
+ #
+ F_VALID="false"
+ for ARCH_DIR in arch/*/; do
+ K_FILES=$(find $ARCH_DIR -name "Kconfig*")
+ K_GREP=$(grep "$K" $K_FILES)
+ if [ ! -z "$K_GREP" ]; then
+ F_VALID="true"
+ break
+ fi
+ done
+ if [ "$F_VALID" = "false" ]; then
+ printf "WARNING: '%s' is not a valid Kconfig\n" "$F"
+ fi
+
+ T_FILE="$F_FILE.tmp"
+ grep "^#" $F_FILE > $T_FILE
+ echo " -----------------------" >> $T_FILE
+ echo " | arch |status|" >> $T_FILE
+ echo " -----------------------" >> $T_FILE
+ for ARCH_DIR in arch/*/; do
+ ARCH=$(echo $ARCH_DIR | sed -e 's/^arch//g' | sed -e 's/\///g')
+ K_FILES=$(find $ARCH_DIR -name "Kconfig*")
+ K_GREP=$(grep "$K" $K_FILES)
+ #
+ # Arch support status values for (O, K) are updated according
+ # to the following rules.
+ #
+ # - ("", K) is 'supported by a given arch', if there is a
+ # Kconfig file for that arch which contains K;
+ #
+ # - ("not", K) is 'supported by a given arch', if there is
+ # no Kconfig file for that arch which contains K;
+ #
+ # - otherwise: preserve the previous status value (if any),
+ # default to 'not yet supported'.
+ #
+ # Notice that, according these rules, invalid features may be
+ # updated/modified.
+ #
+ if [ "$O" = "" ] && [ ! -z "$K_GREP" ]; then
+ printf " |%12s: | ok |\n" "$ARCH" >> $T_FILE
+ elif [ "$O" = "not" ] && [ -z "$K_GREP" ]; then
+ printf " |%12s: | ok |\n" "$ARCH" >> $T_FILE
+ else
+ S=$(grep -v "^#" "$F_FILE" | grep " $ARCH:")
+ if [ ! -z "$S" ]; then
+ echo "$S" >> $T_FILE
+ else
+ printf " |%12s: | TODO |\n" "$ARCH" \
+ >> $T_FILE
+ fi
+ fi
+ done
+ echo " -----------------------" >> $T_FILE
+ mv $T_FILE $F_FILE
+done
diff --git a/tools/docs/find-unused-docs.sh b/tools/docs/find-unused-docs.sh
new file mode 100755
index 000000000000..05552dbda5bc
--- /dev/null
+++ b/tools/docs/find-unused-docs.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# (c) 2017, Jonathan Corbet <corbet@lwn.net>
+# sayli karnik <karniksayli1995@gmail.com>
+#
+# This script detects files with kernel-doc comments for exported functions
+# that are not included in documentation.
+#
+# usage: Run 'tools/docs/find-unused-docs.sh directory' from top level of kernel
+# tree.
+#
+# example: $tools/docs/find-unused-docs.sh drivers/scsi
+#
+# Licensed under the terms of the GNU GPL License
+
+if ! [ -d "Documentation" ]; then
+ echo "Run from top level of kernel tree"
+ exit 1
+fi
+
+if [ "$#" -ne 1 ]; then
+ echo "Usage: tools/docs/find-unused-docs.sh directory"
+ exit 1
+fi
+
+if ! [ -d "$1" ]; then
+ echo "Directory $1 doesn't exist"
+ exit 1
+fi
+
+cd "$( dirname "${BASH_SOURCE[0]}" )"
+cd ..
+
+cd Documentation/
+
+echo "The following files contain kerneldoc comments for exported functions \
+that are not used in the formatted documentation"
+
+# FILES INCLUDED
+
+files_included=($(grep -rHR ".. kernel-doc" --include \*.rst | cut -d " " -f 3))
+
+declare -A FILES_INCLUDED
+
+for each in "${files_included[@]}"; do
+ FILES_INCLUDED[$each]="$each"
+ done
+
+cd ..
+
+# FILES NOT INCLUDED
+
+for file in `find $1 -name '*.c'`; do
+
+ if [[ ${FILES_INCLUDED[$file]+_} ]]; then
+ continue;
+ fi
+ str=$(PYTHONDONTWRITEBYTECODE=1 scripts/kernel-doc -export "$file" 2>/dev/null)
+ if [[ -n "$str" ]]; then
+ echo "$file"
+ fi
+ done
+
diff --git a/tools/docs/gen-redirects.py b/tools/docs/gen-redirects.py
new file mode 100755
index 000000000000..6a6ebf6f42dc
--- /dev/null
+++ b/tools/docs/gen-redirects.py
@@ -0,0 +1,54 @@
+#! /usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright © 2025, Oracle and/or its affiliates.
+# Author: Vegard Nossum <vegard.nossum@oracle.com>
+
+"""Generate HTML redirects for renamed Documentation/**.rst files using
+the output of tools/docs/gen-renames.py.
+
+Example:
+
+ tools/docs/gen-redirects.py --output Documentation/output/ < Documentation/.renames.txt
+"""
+
+import argparse
+import os
+import sys
+
+parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument('-o', '--output', help='output directory')
+
+args = parser.parse_args()
+
+for line in sys.stdin:
+ line = line.rstrip('\n')
+
+ old_name, new_name = line.split(' ', 2)
+
+ old_html_path = os.path.join(args.output, old_name + '.html')
+ new_html_path = os.path.join(args.output, new_name + '.html')
+
+ if not os.path.exists(new_html_path):
+ print(f"warning: target does not exist: {new_html_path} (redirect from {old_html_path})")
+ continue
+
+ old_html_dir = os.path.dirname(old_html_path)
+ if not os.path.exists(old_html_dir):
+ os.makedirs(old_html_dir)
+
+ relpath = os.path.relpath(new_name, os.path.dirname(old_name)) + '.html'
+
+ with open(old_html_path, 'w') as f:
+ print(f"""\
+<!DOCTYPE html>
+
+<html lang="en">
+<head>
+ <title>This page has moved</title>
+ <meta http-equiv="refresh" content="0; url={relpath}">
+</head>
+<body>
+<p>This page has moved to <a href="{relpath}">{new_name}</a>.</p>
+</body>
+</html>""", file=f)
diff --git a/tools/docs/gen-renames.py b/tools/docs/gen-renames.py
new file mode 100755
index 000000000000..8cb3b2157d83
--- /dev/null
+++ b/tools/docs/gen-renames.py
@@ -0,0 +1,130 @@
+#! /usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright © 2025, Oracle and/or its affiliates.
+# Author: Vegard Nossum <vegard.nossum@oracle.com>
+
+"""Trawl repository history for renames of Documentation/**.rst files.
+
+Example:
+
+ tools/docs/gen-renames.py --rev HEAD > Documentation/.renames.txt
+"""
+
+import argparse
+import itertools
+import os
+import subprocess
+import sys
+
+parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
+parser.add_argument('--rev', default='HEAD', help='generate renames up to this revision')
+
+args = parser.parse_args()
+
+def normalize(path):
+ prefix = 'Documentation/'
+ suffix = '.rst'
+
+ assert path.startswith(prefix)
+ assert path.endswith(suffix)
+
+ return path[len(prefix):-len(suffix)]
+
+class Name(object):
+ def __init__(self, name):
+ self.names = [name]
+
+ def rename(self, new_name):
+ self.names.append(new_name)
+
+names = {
+}
+
+for line in subprocess.check_output([
+ 'git', 'log',
+ '--reverse',
+ '--oneline',
+ '--find-renames',
+ '--diff-filter=RD',
+ '--name-status',
+ '--format=commit %H',
+ # ~v4.8-ish is when Sphinx/.rst was added in the first place
+ f'v4.8..{args.rev}',
+ '--',
+ 'Documentation/'
+], text=True).splitlines():
+ # rename
+ if line.startswith('R'):
+ _, old, new = line[1:].split('\t', 2)
+
+ if old.endswith('.rst') and new.endswith('.rst'):
+ old = normalize(old)
+ new = normalize(new)
+
+ name = names.get(old)
+ if name is None:
+ name = Name(old)
+ else:
+ del names[old]
+
+ name.rename(new)
+ names[new] = name
+
+ continue
+
+ # delete
+ if line.startswith('D'):
+ _, old = line.split('\t', 1)
+
+ if old.endswith('.rst'):
+ old = normalize(old)
+
+ # TODO: we could save added/modified files as well and propose
+ # them as alternatives
+ name = names.get(old)
+ if name is None:
+ pass
+ else:
+ del names[old]
+
+ continue
+
+#
+# Get the set of current files so we can sanity check that we aren't
+# redirecting any of those
+#
+
+current_files = set()
+for line in subprocess.check_output([
+ 'git', 'ls-tree',
+ '-r',
+ '--name-only',
+ args.rev,
+ 'Documentation/',
+], text=True).splitlines():
+ if line.endswith('.rst'):
+ current_files.add(normalize(line))
+
+#
+# Format/group/output result
+#
+
+result = []
+for _, v in names.items():
+ old_names = v.names[:-1]
+ new_name = v.names[-1]
+
+ for old_name in old_names:
+ if old_name == new_name:
+ # A file was renamed to its new name twice; don't redirect that
+ continue
+
+ if old_name in current_files:
+ # A file was recreated with a former name; don't redirect those
+ continue
+
+ result.append((old_name, new_name))
+
+for old_name, new_name in sorted(result):
+ print(f"{old_name} {new_name}")
diff --git a/tools/docs/get_abi.py b/tools/docs/get_abi.py
new file mode 100755
index 000000000000..2f0b99401f26
--- /dev/null
+++ b/tools/docs/get_abi.py
@@ -0,0 +1,214 @@
+#!/usr/bin/env python3
+# pylint: disable=R0903
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Parse ABI documentation and produce results from it.
+"""
+
+import argparse
+import logging
+import os
+import sys
+
+# Import Python modules
+
+LIB_DIR = "../lib/python"
+SRC_DIR = os.path.dirname(os.path.realpath(__file__))
+
+sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR))
+
+from abi.abi_parser import AbiParser # pylint: disable=C0413
+from abi.abi_regex import AbiRegex # pylint: disable=C0413
+from abi.helpers import ABI_DIR, DEBUG_HELP # pylint: disable=C0413
+from abi.system_symbols import SystemSymbols # pylint: disable=C0413
+
+# Command line classes
+
+
+REST_DESC = """
+Produce output in ReST format.
+
+The output is done on two sections:
+
+- Symbols: show all parsed symbols in alphabetic order;
+- Files: cross reference the content of each file with the symbols on it.
+"""
+
+class AbiRest:
+ """Initialize an argparse subparser for rest output"""
+
+ def __init__(self, subparsers):
+ """Initialize argparse subparsers"""
+
+ parser = subparsers.add_parser("rest",
+ formatter_class=argparse.RawTextHelpFormatter,
+ description=REST_DESC)
+
+ parser.add_argument("--enable-lineno", action="store_true",
+ help="enable lineno")
+ parser.add_argument("--raw", action="store_true",
+ help="output text as contained in the ABI files. "
+ "It not used, output will contain dynamically"
+ " generated cross references when possible.")
+ parser.add_argument("--no-file", action="store_true",
+ help="Don't the files section")
+ parser.add_argument("--show-hints", help="Show-hints")
+
+ parser.set_defaults(func=self.run)
+
+ def run(self, args):
+ """Run subparser"""
+
+ parser = AbiParser(args.dir, debug=args.debug)
+ parser.parse_abi()
+ parser.check_issues()
+
+ for t in parser.doc(args.raw, not args.no_file):
+ if args.enable_lineno:
+ print (f".. LINENO {t[1]}#{t[2]}\n\n")
+
+ print(t[0])
+
+class AbiValidate:
+ """Initialize an argparse subparser for ABI validation"""
+
+ def __init__(self, subparsers):
+ """Initialize argparse subparsers"""
+
+ parser = subparsers.add_parser("validate",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ description="list events")
+
+ parser.set_defaults(func=self.run)
+
+ def run(self, args):
+ """Run subparser"""
+
+ parser = AbiParser(args.dir, debug=args.debug)
+ parser.parse_abi()
+ parser.check_issues()
+
+
+class AbiSearch:
+ """Initialize an argparse subparser for ABI search"""
+
+ def __init__(self, subparsers):
+ """Initialize argparse subparsers"""
+
+ parser = subparsers.add_parser("search",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ description="Search ABI using a regular expression")
+
+ parser.add_argument("expression",
+ help="Case-insensitive search pattern for the ABI symbol")
+
+ parser.set_defaults(func=self.run)
+
+ def run(self, args):
+ """Run subparser"""
+
+ parser = AbiParser(args.dir, debug=args.debug)
+ parser.parse_abi()
+ parser.search_symbols(args.expression)
+
+UNDEFINED_DESC="""
+Check undefined ABIs on local machine.
+
+Read sysfs devnodes and check if the devnodes there are defined inside
+ABI documentation.
+
+The search logic tries to minimize the number of regular expressions to
+search per each symbol.
+
+By default, it runs on a single CPU, as Python support for CPU threads
+is still experimental, and multi-process runs on Python is very slow.
+
+On experimental tests, if the number of ABI symbols to search per devnode
+is contained on a limit of ~150 regular expressions, using a single CPU
+is a lot faster than using multiple processes. However, if the number of
+regular expressions to check is at the order of ~30000, using multiple
+CPUs speeds up the check.
+"""
+
+class AbiUndefined:
+ """
+ Initialize an argparse subparser for logic to check undefined ABI at
+ the current machine's sysfs
+ """
+
+ def __init__(self, subparsers):
+ """Initialize argparse subparsers"""
+
+ parser = subparsers.add_parser("undefined",
+ formatter_class=argparse.RawTextHelpFormatter,
+ description=UNDEFINED_DESC)
+
+ parser.add_argument("-S", "--sysfs-dir", default="/sys",
+ help="directory where sysfs is mounted")
+ parser.add_argument("-s", "--search-string",
+ help="search string regular expression to limit symbol search")
+ parser.add_argument("-H", "--show-hints", action="store_true",
+ help="Hints about definitions for missing ABI symbols.")
+ parser.add_argument("-j", "--jobs", "--max-workers", type=int, default=1,
+ help="If bigger than one, enables multiprocessing.")
+ parser.add_argument("-c", "--max-chunk-size", type=int, default=50,
+ help="Maximum number of chunk size")
+ parser.add_argument("-f", "--found", action="store_true",
+ help="Also show found items. "
+ "Helpful to debug the parser."),
+ parser.add_argument("-d", "--dry-run", action="store_true",
+ help="Don't actually search for undefined. "
+ "Helpful to debug the parser."),
+
+ parser.set_defaults(func=self.run)
+
+ def run(self, args):
+ """Run subparser"""
+
+ abi = AbiRegex(args.dir, debug=args.debug,
+ search_string=args.search_string)
+
+ abi_symbols = SystemSymbols(abi=abi, hints=args.show_hints,
+ sysfs=args.sysfs_dir)
+
+ abi_symbols.check_undefined_symbols(dry_run=args.dry_run,
+ found=args.found,
+ max_workers=args.jobs,
+ chunk_size=args.max_chunk_size)
+
+
+def main():
+ """Main program"""
+
+ parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
+
+ parser.add_argument("-d", "--debug", type=int, default=0, help="debug level")
+ parser.add_argument("-D", "--dir", default=ABI_DIR, help=DEBUG_HELP)
+
+ subparsers = parser.add_subparsers()
+
+ AbiRest(subparsers)
+ AbiValidate(subparsers)
+ AbiSearch(subparsers)
+ AbiUndefined(subparsers)
+
+ args = parser.parse_args()
+
+ if args.debug:
+ level = logging.DEBUG
+ else:
+ level = logging.INFO
+
+ logging.basicConfig(level=level, format="[%(levelname)s] %(message)s")
+
+ if "func" in args:
+ args.func(args)
+ else:
+ sys.exit(f"Please specify a valid command for {sys.argv[0]}")
+
+
+# Call main method
+if __name__ == "__main__":
+ main()
diff --git a/tools/docs/get_feat.py b/tools/docs/get_feat.py
new file mode 100755
index 000000000000..2b5155a1f134
--- /dev/null
+++ b/tools/docs/get_feat.py
@@ -0,0 +1,225 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0911,R0912,R0914,R0915
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+
+"""
+Parse the Linux Feature files and produce a ReST book.
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+
+from pprint import pprint
+
+LIB_DIR = "../../tools/lib/python"
+SRC_DIR = os.path.dirname(os.path.realpath(__file__))
+
+sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR))
+
+from feat.parse_features import ParseFeature # pylint: disable=C0413
+
+SRCTREE = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../..")
+DEFAULT_DIR = "Documentation/features"
+
+
+class GetFeature:
+ """Helper class to parse feature parsing parameters"""
+
+ @staticmethod
+ def get_current_arch():
+ """Detects the current architecture"""
+
+ proc = subprocess.run(["uname", "-m"], check=True,
+ capture_output=True, text=True)
+
+ arch = proc.stdout.strip()
+ if arch in ["x86_64", "i386"]:
+ arch = "x86"
+ elif arch == "s390x":
+ arch = "s390"
+
+ return arch
+
+ def run_parser(self, args):
+ """Execute the feature parser"""
+
+ feat = ParseFeature(args.directory, args.debug, args.enable_fname)
+ data = feat.parse()
+
+ if args.debug > 2:
+ pprint(data)
+
+ return feat
+
+ def run_rest(self, args):
+ """
+ Generate tables in ReST format. Three types of tables are
+ supported, depending on the calling arguments:
+
+ - neither feature nor arch is passed: generates a full matrix;
+ - arch provided: generates a table of supported tables for the
+ guiven architecture, eventually filtered by feature;
+ - only feature provided: generates a table with feature details,
+ showing what architectures it is implemented.
+ """
+
+ feat = self.run_parser(args)
+
+ if args.arch:
+ rst = feat.output_arch_table(args.arch, args.feat)
+ elif args.feat:
+ rst = feat.output_feature(args.feat)
+ else:
+ rst = feat.output_matrix()
+
+ print(rst)
+
+ def run_current(self, args):
+ """
+ Instead of using a --arch parameter, get feature for the current
+ architecture.
+ """
+
+ args.arch = self.get_current_arch()
+
+ self.run_rest(args)
+
+ def run_list(self, args):
+ """
+ Generate a list of features for a given architecture, in a format
+ parseable by other scripts. The output format is not ReST.
+ """
+
+ if not args.arch:
+ args.arch = self.get_current_arch()
+
+ feat = self.run_parser(args)
+ msg = feat.list_arch_features(args.arch, args.feat)
+
+ print(msg)
+
+ def parse_arch(self, parser):
+ """Add a --arch parsing argument"""
+
+ parser.add_argument("--arch",
+ help="Output features for an specific"
+ " architecture, optionally filtering for a "
+ "single specific feature.")
+
+ def parse_feat(self, parser):
+ """Add a --feat parsing argument"""
+
+ parser.add_argument("--feat", "--feature",
+ help="Output features for a single specific "
+ "feature.")
+
+
+ def current_args(self, subparsers):
+ """Implementscurrent argparse subparser"""
+
+ parser = subparsers.add_parser("current",
+ formatter_class=argparse.RawTextHelpFormatter,
+ description="Output table in ReST "
+ "compatible ASCII format "
+ "with features for this "
+ "machine's architecture")
+
+ self.parse_feat(parser)
+ parser.set_defaults(func=self.run_current)
+
+ def rest_args(self, subparsers):
+ """Implement rest argparse subparser"""
+
+ parser = subparsers.add_parser("rest",
+ formatter_class=argparse.RawTextHelpFormatter,
+ description="Output table(s) in ReST "
+ "compatible ASCII format "
+ "with features in ReST "
+ "markup language. The "
+ "output is affected by "
+ "--arch or --feat/--feature"
+ " flags.")
+
+ self.parse_arch(parser)
+ self.parse_feat(parser)
+ parser.set_defaults(func=self.run_rest)
+
+ def list_args(self, subparsers):
+ """Implement list argparse subparser"""
+
+ parser = subparsers.add_parser("list",
+ formatter_class=argparse.RawTextHelpFormatter,
+ description="List features for this "
+ "machine's architecture, "
+ "using an easier to parse "
+ "format. The output is "
+ "affected by --arch flag.")
+
+ self.parse_arch(parser)
+ self.parse_feat(parser)
+ parser.set_defaults(func=self.run_list)
+
+ def validate_args(self, subparsers):
+ """Implement validate argparse subparser"""
+
+ parser = subparsers.add_parser("validate",
+ formatter_class=argparse.RawTextHelpFormatter,
+ description="Validate the contents of "
+ "the files under "
+ f"{DEFAULT_DIR}.")
+
+ parser.set_defaults(func=self.run_parser)
+
+ def parser(self):
+ """
+ Create an arparse with common options and several subparsers
+ """
+ parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
+
+ parser.add_argument("-d", "--debug", action="count", default=0,
+ help="Put the script in verbose mode, useful for "
+ "debugging. Can be called multiple times, to "
+ "increase verbosity.")
+
+ parser.add_argument("--directory", "--dir", default=DEFAULT_DIR,
+ help="Changes the location of the Feature files. "
+ f"By default, it uses the {DEFAULT_DIR} "
+ "directory.")
+
+ parser.add_argument("--enable-fname", action="store_true",
+ help="Prints the file name of the feature files. "
+ "This can be used in order to track "
+ "dependencies during documentation build.")
+
+ subparsers = parser.add_subparsers()
+
+ self.current_args(subparsers)
+ self.rest_args(subparsers)
+ self.list_args(subparsers)
+ self.validate_args(subparsers)
+
+ args = parser.parse_args()
+
+ return args
+
+
+def main():
+ """Main program"""
+
+ feat = GetFeature()
+
+ args = feat.parser()
+
+ if "func" in args:
+ args.func(args)
+ else:
+ sys.exit(f"Please specify a valid command for {sys.argv[0]}")
+
+
+# Call main method
+if __name__ == "__main__":
+ main()
diff --git a/tools/docs/list-arch.sh b/tools/docs/list-arch.sh
new file mode 100755
index 000000000000..96fe83b7058b
--- /dev/null
+++ b/tools/docs/list-arch.sh
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Small script that visualizes the kernel feature support status
+# of an architecture.
+#
+# (If no arguments are given then it will print the host architecture's status.)
+#
+
+ARCH=${1:-$(uname -m | sed 's/x86_64/x86/' | sed 's/i386/x86/' | sed 's/s390x/s390/')}
+
+$(dirname $0)/get_feat.pl list --arch $ARCH
diff --git a/tools/docs/parse-headers.py b/tools/docs/parse-headers.py
new file mode 100755
index 000000000000..436acea4c6ca
--- /dev/null
+++ b/tools/docs/parse-headers.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2016, 2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
+# pylint: disable=C0103
+
+"""
+Convert a C header or source file ``FILE_IN``, into a ReStructured Text
+included via ..parsed-literal block with cross-references for the
+documentation files that describe the API. It accepts an optional
+``FILE_RULES`` file to describes what elements will be either ignored or
+be pointed to a non-default reference type/name.
+
+The output is written at ``FILE_OUT``.
+
+It is capable of identifying defines, functions, structs, typedefs,
+enums and enum symbols and create cross-references for all of them.
+It is also capable of distinguish #define used for specifying a Linux
+ioctl.
+
+The optional ``FILE_RULES`` contains a set of rules like:
+
+ ignore ioctl VIDIOC_ENUM_FMT
+ replace ioctl VIDIOC_DQBUF vidioc_qbuf
+ replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
+"""
+
+import argparse, sys
+import os.path
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(src_dir, '../lib/python'))
+from kdoc.parse_data_structs import ParseDataStructs
+from kdoc.enrich_formatter import EnrichFormatter
+
+def main():
+ """Main function"""
+ parser = argparse.ArgumentParser(description=__doc__,
+ formatter_class=EnrichFormatter)
+
+ parser.add_argument("-d", "--debug", action="count", default=0,
+ help="Increase debug level. Can be used multiple times")
+ parser.add_argument("-t", "--toc", action="store_true",
+ help="instead of a literal block, outputs a TOC table at the RST file")
+
+ parser.add_argument("file_in", help="Input C file")
+ parser.add_argument("file_out", help="Output RST file")
+ parser.add_argument("file_rules", nargs="?",
+ help="Exceptions file (optional)")
+
+ args = parser.parse_args()
+
+ parser = ParseDataStructs(debug=args.debug)
+ parser.parse_file(args.file_in, args.file_rules)
+
+ parser.debug_print()
+ parser.write_output(args.file_in, args.file_out, args.toc)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/docs/sphinx-build-wrapper b/tools/docs/sphinx-build-wrapper
new file mode 100755
index 000000000000..7a5fcef25429
--- /dev/null
+++ b/tools/docs/sphinx-build-wrapper
@@ -0,0 +1,864 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+#
+# pylint: disable=R0902, R0912, R0913, R0914, R0915, R0917, C0103
+#
+# Converted from docs Makefile and parallel-wrapper.sh, both under
+# GPLv2, copyrighted since 2008 by the following authors:
+#
+# Akira Yokosawa <akiyks@gmail.com>
+# Arnd Bergmann <arnd@arndb.de>
+# Breno Leitao <leitao@debian.org>
+# Carlos Bilbao <carlos.bilbao@amd.com>
+# Dave Young <dyoung@redhat.com>
+# Donald Hunter <donald.hunter@gmail.com>
+# Geert Uytterhoeven <geert+renesas@glider.be>
+# Jani Nikula <jani.nikula@intel.com>
+# Jan Stancek <jstancek@redhat.com>
+# Jonathan Corbet <corbet@lwn.net>
+# Joshua Clayton <stillcompiling@gmail.com>
+# Kees Cook <keescook@chromium.org>
+# Linus Torvalds <torvalds@linux-foundation.org>
+# Magnus Damm <damm+renesas@opensource.se>
+# Masahiro Yamada <masahiroy@kernel.org>
+# Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+# Maxim Cournoyer <maxim.cournoyer@gmail.com>
+# Peter Foley <pefoley2@pefoley.com>
+# Randy Dunlap <rdunlap@infradead.org>
+# Rob Herring <robh@kernel.org>
+# Shuah Khan <shuahkh@osg.samsung.com>
+# Thorsten Blum <thorsten.blum@toblux.com>
+# Tomas Winkler <tomas.winkler@intel.com>
+
+
+"""
+Sphinx build wrapper that handles Kernel-specific business rules:
+
+- it gets the Kernel build environment vars;
+- it determines what's the best parallelism;
+- it handles SPHINXDIRS
+
+This tool ensures that MIN_PYTHON_VERSION is satisfied. If version is
+below that, it seeks for a new Python version. If found, it re-runs using
+the newer version.
+"""
+
+import argparse
+import locale
+import os
+import re
+import shlex
+import shutil
+import subprocess
+import sys
+
+from concurrent import futures
+from glob import glob
+
+
+LIB_DIR = "../lib/python"
+SRC_DIR = os.path.dirname(os.path.realpath(__file__))
+
+sys.path.insert(0, os.path.join(SRC_DIR, LIB_DIR))
+
+from kdoc.python_version import PythonVersion
+from kdoc.latex_fonts import LatexFontChecker
+from jobserver import JobserverExec # pylint: disable=C0413,C0411,E0401
+
+#
+# Some constants
+#
+VENV_DEFAULT = "sphinx_latest"
+MIN_PYTHON_VERSION = PythonVersion("3.7").version
+PAPER = ["", "a4", "letter"]
+
+TARGETS = {
+ "cleandocs": { "builder": "clean" },
+ "linkcheckdocs": { "builder": "linkcheck" },
+ "htmldocs": { "builder": "html" },
+ "epubdocs": { "builder": "epub", "out_dir": "epub" },
+ "texinfodocs": { "builder": "texinfo", "out_dir": "texinfo" },
+ "infodocs": { "builder": "texinfo", "out_dir": "texinfo" },
+ "mandocs": { "builder": "man", "out_dir": "man" },
+ "latexdocs": { "builder": "latex", "out_dir": "latex" },
+ "pdfdocs": { "builder": "latex", "out_dir": "latex" },
+ "xmldocs": { "builder": "xml", "out_dir": "xml" },
+}
+
+
+#
+# SphinxBuilder class
+#
+
+class SphinxBuilder:
+ """
+ Handles a sphinx-build target, adding needed arguments to build
+ with the Kernel.
+ """
+
+ def get_path(self, path, use_cwd=False, abs_path=False):
+ """
+ Ancillary routine to handle patches the right way, as shell does.
+
+ It first expands "~" and "~user". Then, if patch is not absolute,
+ join self.srctree. Finally, if requested, convert to abspath.
+ """
+
+ path = os.path.expanduser(path)
+ if not path.startswith("/"):
+ if use_cwd:
+ base = os.getcwd()
+ else:
+ base = self.srctree
+
+ path = os.path.join(base, path)
+
+ if abs_path:
+ return os.path.abspath(path)
+
+ return path
+
+ def check_rust(self):
+ """
+ Checks if Rust is enabled
+ """
+ self.rustdoc = False
+
+ config = os.path.join(self.srctree, ".config")
+
+ if not os.path.isfile(config):
+ return
+
+ re_rust = re.compile(r"CONFIG_RUST=(m|y)")
+
+ try:
+ with open(config, "r", encoding="utf-8") as fp:
+ for line in fp:
+ if re_rust.match(line):
+ self.rustdoc = True
+ return
+
+ except OSError as e:
+ print(f"Failed to open {config}", file=sys.stderr)
+
+ def get_sphinx_extra_opts(self, n_jobs):
+ """
+ Get the number of jobs to be used for docs build passed via command
+ line and desired sphinx verbosity.
+
+ The number of jobs can be on different places:
+
+ 1) It can be passed via "-j" argument;
+ 2) The SPHINXOPTS="-j8" env var may have "-j";
+ 3) if called via GNU make, -j specifies the desired number of jobs.
+ with GNU makefile, this number is available via POSIX jobserver;
+ 4) if none of the above is available, it should default to "-jauto",
+ and let sphinx decide the best value.
+ """
+
+ #
+ # SPHINXOPTS env var, if used, contains extra arguments to be used
+ # by sphinx-build time. Among them, it may contain sphinx verbosity
+ # and desired number of parallel jobs.
+ #
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-j', '--jobs', type=int)
+ parser.add_argument('-q', '--quiet', action='store_true')
+
+ #
+ # Other sphinx-build arguments go as-is, so place them
+ # at self.sphinxopts, using shell parser
+ #
+ sphinxopts = shlex.split(os.environ.get("SPHINXOPTS", ""))
+
+ #
+ # Build a list of sphinx args, honoring verbosity here if specified
+ #
+
+ verbose = self.verbose
+ sphinx_args, self.sphinxopts = parser.parse_known_args(sphinxopts)
+ if sphinx_args.quiet is True:
+ verbose = False
+
+ #
+ # If the user explicitly sets "-j" at command line, use it.
+ # Otherwise, pick it from SPHINXOPTS args
+ #
+ if n_jobs:
+ self.n_jobs = n_jobs
+ elif sphinx_args.jobs:
+ self.n_jobs = sphinx_args.jobs
+ else:
+ self.n_jobs = None
+
+ if not verbose:
+ self.sphinxopts += ["-q"]
+
+ def __init__(self, builddir, venv=None, verbose=False, n_jobs=None,
+ interactive=None):
+ """Initialize internal variables"""
+ self.venv = venv
+ self.verbose = None
+
+ #
+ # Normal variables passed from Kernel's makefile
+ #
+ self.kernelversion = os.environ.get("KERNELVERSION", "unknown")
+ self.kernelrelease = os.environ.get("KERNELRELEASE", "unknown")
+ self.pdflatex = os.environ.get("PDFLATEX", "xelatex")
+
+ #
+ # Kernel main Makefile defines a PYTHON3 variable whose default is
+ # "python3". When set to a different value, it allows running a
+ # diferent version than the default official python3 package.
+ # Several distros package python3xx-sphinx packages with newer
+ # versions of Python and sphinx-build.
+ #
+ # Honor such variable different than default
+ #
+ self.python = os.environ.get("PYTHON3")
+ if self.python == "python3":
+ self.python = None
+
+ if not interactive:
+ self.latexopts = os.environ.get("LATEXOPTS", "-interaction=batchmode -no-shell-escape")
+ else:
+ self.latexopts = os.environ.get("LATEXOPTS", "")
+
+ if not verbose:
+ verbose = bool(os.environ.get("KBUILD_VERBOSE", "") != "")
+
+ if verbose is not None:
+ self.verbose = verbose
+
+ #
+ # Source tree directory. This needs to be at os.environ, as
+ # Sphinx extensions use it
+ #
+ self.srctree = os.environ.get("srctree")
+ if not self.srctree:
+ self.srctree = "."
+ os.environ["srctree"] = self.srctree
+
+ #
+ # Now that we can expand srctree, get other directories as well
+ #
+ self.sphinxbuild = os.environ.get("SPHINXBUILD", "sphinx-build")
+ self.kerneldoc = self.get_path(os.environ.get("KERNELDOC",
+ "scripts/kernel-doc.py"))
+ self.builddir = self.get_path(builddir, use_cwd=True, abs_path=True)
+
+ #
+ # Get directory locations for LaTeX build toolchain
+ #
+ self.pdflatex_cmd = shutil.which(self.pdflatex)
+ self.latexmk_cmd = shutil.which("latexmk")
+
+ self.env = os.environ.copy()
+
+ self.get_sphinx_extra_opts(n_jobs)
+
+ self.check_rust()
+
+ #
+ # If venv command line argument is specified, run Sphinx from venv
+ #
+ if venv:
+ bin_dir = os.path.join(venv, "bin")
+ if not os.path.isfile(os.path.join(bin_dir, "activate")):
+ sys.exit(f"Venv {venv} not found.")
+
+ # "activate" virtual env
+ self.env["PATH"] = bin_dir + ":" + self.env["PATH"]
+ self.env["VIRTUAL_ENV"] = venv
+ if "PYTHONHOME" in self.env:
+ del self.env["PYTHONHOME"]
+ print(f"Setting venv to {venv}")
+
+ def run_sphinx(self, sphinx_build, build_args, *args, **pwargs):
+ """
+ Executes sphinx-build using current python3 command.
+
+ When calling via GNU make, POSIX jobserver is used to tell how
+ many jobs are still available from a job pool. claim all remaining
+ jobs, as we don't want sphinx-build to run in parallel with other
+ jobs.
+
+ Despite that, the user may actually force a different value than
+ the number of available jobs via command line.
+
+ The "with" logic here is used to ensure that the claimed jobs will
+ be freed once subprocess finishes
+ """
+
+ with JobserverExec() as jobserver:
+ if jobserver.claim:
+ #
+ # when GNU make is used, claim available jobs from jobserver
+ #
+ n_jobs = str(jobserver.claim)
+ else:
+ #
+ # Otherwise, let sphinx decide by default
+ #
+ n_jobs = "auto"
+
+ #
+ # If explicitly requested via command line, override default
+ #
+ if self.n_jobs:
+ n_jobs = str(self.n_jobs)
+
+ #
+ # We can't simply call python3 sphinx-build, as OpenSUSE
+ # Tumbleweed uses an ELF binary file (/usr/bin/alts) to switch
+ # between different versions of sphinx-build. So, only call it
+ # prepending "python3.xx" when PYTHON3 variable is not default.
+ #
+ if self.python:
+ cmd = [self.python]
+ else:
+ cmd = []
+
+ cmd += [sphinx_build]
+ cmd += [f"-j{n_jobs}"]
+ cmd += build_args
+ cmd += self.sphinxopts
+
+ if self.verbose:
+ print(" ".join(cmd))
+
+ return subprocess.call(cmd, *args, **pwargs)
+
+ def handle_html(self, css, output_dir):
+ """
+ Extra steps for HTML and epub output.
+
+ For such targets, we need to ensure that CSS will be properly
+ copied to the output _static directory
+ """
+
+ if css:
+ css = os.path.expanduser(css)
+ if not css.startswith("/"):
+ css = os.path.join(self.srctree, css)
+
+ static_dir = os.path.join(output_dir, "_static")
+ os.makedirs(static_dir, exist_ok=True)
+
+ try:
+ shutil.copy2(css, static_dir)
+ except (OSError, IOError) as e:
+ print(f"Warning: Failed to copy CSS: {e}", file=sys.stderr)
+
+ if self.rustdoc:
+ print("Building rust docs")
+ if "MAKE" in self.env:
+ cmd = [self.env["MAKE"]]
+ else:
+ cmd = ["make", "LLVM=1"]
+
+ cmd += [ "rustdoc"]
+ if self.verbose:
+ print(" ".join(cmd))
+
+ try:
+ subprocess.run(cmd, check=True)
+ except subprocess.CalledProcessError as e:
+ print(f"Ignored errors when building rustdoc: {e}. Is RUST enabled?",
+ file=sys.stderr)
+
+ def build_pdf_file(self, latex_cmd, from_dir, path):
+ """Builds a single pdf file using latex_cmd"""
+ try:
+ subprocess.run(latex_cmd + [path],
+ cwd=from_dir, check=True, env=self.env)
+
+ return True
+ except subprocess.CalledProcessError:
+ return False
+
+ def pdf_parallel_build(self, tex_suffix, latex_cmd, tex_files, n_jobs):
+ """Build PDF files in parallel if possible"""
+ builds = {}
+ build_failed = False
+ max_len = 0
+ has_tex = False
+
+ #
+ # LaTeX PDF error code is almost useless for us:
+ # any warning makes it non-zero. For kernel doc builds it always return
+ # non-zero even when build succeeds. So, let's do the best next thing:
+ # Ignore build errors. At the end, check if all PDF files were built,
+ # printing a summary with the built ones and returning 0 if all of
+ # them were actually built.
+ #
+ with futures.ThreadPoolExecutor(max_workers=n_jobs) as executor:
+ jobs = {}
+
+ for from_dir, pdf_dir, entry in tex_files:
+ name = entry.name
+
+ if not name.endswith(tex_suffix):
+ continue
+
+ name = name[:-len(tex_suffix)]
+ has_tex = True
+
+ future = executor.submit(self.build_pdf_file, latex_cmd,
+ from_dir, entry.path)
+ jobs[future] = (from_dir, pdf_dir, name)
+
+ for future in futures.as_completed(jobs):
+ from_dir, pdf_dir, name = jobs[future]
+
+ pdf_name = name + ".pdf"
+ pdf_from = os.path.join(from_dir, pdf_name)
+ pdf_to = os.path.join(pdf_dir, pdf_name)
+ out_name = os.path.relpath(pdf_to, self.builddir)
+ max_len = max(max_len, len(out_name))
+
+ try:
+ success = future.result()
+
+ if success and os.path.exists(pdf_from):
+ os.rename(pdf_from, pdf_to)
+
+ #
+ # if verbose, get the name of built PDF file
+ #
+ if self.verbose:
+ builds[out_name] = "SUCCESS"
+ else:
+ builds[out_name] = "FAILED"
+ build_failed = True
+ except futures.Error as e:
+ builds[out_name] = f"FAILED ({repr(e)})"
+ build_failed = True
+
+ #
+ # Handle case where no .tex files were found
+ #
+ if not has_tex:
+ out_name = "LaTeX files"
+ max_len = max(max_len, len(out_name))
+ builds[out_name] = "FAILED: no .tex files were generated"
+ build_failed = True
+
+ return builds, build_failed, max_len
+
+ def handle_pdf(self, output_dirs, deny_vf):
+ """
+ Extra steps for PDF output.
+
+ As PDF is handled via a LaTeX output, after building the .tex file,
+ a new build is needed to create the PDF output from the latex
+ directory.
+ """
+ builds = {}
+ max_len = 0
+ tex_suffix = ".tex"
+ tex_files = []
+
+ #
+ # Since early 2024, Fedora and openSUSE tumbleweed have started
+ # deploying variable-font format of "Noto CJK", causing LaTeX
+ # to break with CJK. Work around it, by denying the variable font
+ # usage during xelatex build by passing the location of a config
+ # file with a deny list.
+ #
+ # See tools/docs/lib/latex_fonts.py for more details.
+ #
+ if deny_vf:
+ deny_vf = os.path.expanduser(deny_vf)
+ if os.path.isdir(deny_vf):
+ self.env["XDG_CONFIG_HOME"] = deny_vf
+
+ for from_dir in output_dirs:
+ pdf_dir = os.path.join(from_dir, "../pdf")
+ os.makedirs(pdf_dir, exist_ok=True)
+
+ if self.latexmk_cmd:
+ latex_cmd = [self.latexmk_cmd, f"-{self.pdflatex}"]
+ else:
+ latex_cmd = [self.pdflatex]
+
+ latex_cmd.extend(shlex.split(self.latexopts))
+
+ # Get a list of tex files to process
+ with os.scandir(from_dir) as it:
+ for entry in it:
+ if entry.name.endswith(tex_suffix):
+ tex_files.append((from_dir, pdf_dir, entry))
+
+ #
+ # When using make, this won't be used, as the number of jobs comes
+ # from POSIX jobserver. So, this covers the case where build comes
+ # from command line. On such case, serialize by default, except if
+ # the user explicitly sets the number of jobs.
+ #
+ n_jobs = 1
+
+ # n_jobs is either an integer or "auto". Only use it if it is a number
+ if self.n_jobs:
+ try:
+ n_jobs = int(self.n_jobs)
+ except ValueError:
+ pass
+
+ #
+ # When using make, jobserver.claim is the number of jobs that were
+ # used with "-j" and that aren't used by other make targets
+ #
+ with JobserverExec() as jobserver:
+ n_jobs = 1
+
+ #
+ # Handle the case when a parameter is passed via command line,
+ # using it as default, if jobserver doesn't claim anything
+ #
+ if self.n_jobs:
+ try:
+ n_jobs = int(self.n_jobs)
+ except ValueError:
+ pass
+
+ if jobserver.claim:
+ n_jobs = jobserver.claim
+
+ builds, build_failed, max_len = self.pdf_parallel_build(tex_suffix,
+ latex_cmd,
+ tex_files,
+ n_jobs)
+
+ #
+ # In verbose mode, print a summary with the build results per file.
+ # Otherwise, print a single line with all failures, if any.
+ # On both cases, return code 1 indicates build failures,
+ #
+ if self.verbose:
+ msg = "Summary"
+ msg += "\n" + "=" * len(msg)
+ print()
+ print(msg)
+
+ for pdf_name, pdf_file in builds.items():
+ print(f"{pdf_name:<{max_len}}: {pdf_file}")
+
+ print()
+ if build_failed:
+ msg = LatexFontChecker().check()
+ if msg:
+ print(msg)
+
+ sys.exit("Error: not all PDF files were created.")
+
+ elif build_failed:
+ n_failures = len(builds)
+ failures = ", ".join(builds.keys())
+
+ msg = LatexFontChecker().check()
+ if msg:
+ print(msg)
+
+ sys.exit(f"Error: Can't build {n_failures} PDF file(s): {failures}")
+
+ def handle_info(self, output_dirs):
+ """
+ Extra steps for Info output.
+
+ For texinfo generation, an additional make is needed from the
+ texinfo directory.
+ """
+
+ for output_dir in output_dirs:
+ try:
+ subprocess.run(["make", "info"], cwd=output_dir, check=True)
+ except subprocess.CalledProcessError as e:
+ sys.exit(f"Error generating info docs: {e}")
+
+ def handle_man(self, kerneldoc, docs_dir, src_dir, output_dir):
+ """
+ Create man pages from kernel-doc output
+ """
+
+ re_kernel_doc = re.compile(r"^\.\.\s+kernel-doc::\s*(\S+)")
+ re_man = re.compile(r'^\.TH "[^"]*" (\d+) "([^"]*)"')
+
+ if docs_dir == src_dir:
+ #
+ # Pick the entire set of kernel-doc markups from the entire tree
+ #
+ kdoc_files = set([self.srctree])
+ else:
+ kdoc_files = set()
+
+ for fname in glob(os.path.join(src_dir, "**"), recursive=True):
+ if os.path.isfile(fname) and fname.endswith(".rst"):
+ with open(fname, "r", encoding="utf-8") as in_fp:
+ data = in_fp.read()
+
+ for line in data.split("\n"):
+ match = re_kernel_doc.match(line)
+ if match:
+ if os.path.isfile(match.group(1)):
+ kdoc_files.add(match.group(1))
+
+ if not kdoc_files:
+ sys.exit(f"Directory {src_dir} doesn't contain kernel-doc tags")
+
+ cmd = [ kerneldoc, "-m" ] + sorted(kdoc_files)
+ try:
+ if self.verbose:
+ print(" ".join(cmd))
+
+ result = subprocess.run(cmd, stdout=subprocess.PIPE, text= True)
+
+ if result.returncode:
+ print(f"Warning: kernel-doc returned {result.returncode} warnings")
+
+ except (OSError, ValueError, subprocess.SubprocessError) as e:
+ sys.exit(f"Failed to create man pages for {src_dir}: {repr(e)}")
+
+ fp = None
+ try:
+ for line in result.stdout.split("\n"):
+ match = re_man.match(line)
+ if not match:
+ if fp:
+ fp.write(line + '\n')
+ continue
+
+ if fp:
+ fp.close()
+
+ fname = f"{output_dir}/{match.group(2)}.{match.group(1)}"
+
+ if self.verbose:
+ print(f"Creating {fname}")
+ fp = open(fname, "w", encoding="utf-8")
+ fp.write(line + '\n')
+ finally:
+ if fp:
+ fp.close()
+
+ def cleandocs(self, builder): # pylint: disable=W0613
+ """Remove documentation output directory"""
+ shutil.rmtree(self.builddir, ignore_errors=True)
+
+ def build(self, target, sphinxdirs=None,
+ theme=None, css=None, paper=None, deny_vf=None,
+ skip_sphinx=False):
+ """
+ Build documentation using Sphinx. This is the core function of this
+ module. It prepares all arguments required by sphinx-build.
+ """
+
+ builder = TARGETS[target]["builder"]
+ out_dir = TARGETS[target].get("out_dir", "")
+
+ #
+ # Cleandocs doesn't require sphinx-build
+ #
+ if target == "cleandocs":
+ self.cleandocs(builder)
+ return
+
+ if theme:
+ os.environ["DOCS_THEME"] = theme
+
+ #
+ # Other targets require sphinx-build, so check if it exists
+ #
+ if not skip_sphinx:
+ sphinxbuild = shutil.which(self.sphinxbuild, path=self.env["PATH"])
+ if not sphinxbuild and target != "mandocs":
+ sys.exit(f"Error: {self.sphinxbuild} not found in PATH.\n")
+
+ if target == "pdfdocs":
+ if not self.pdflatex_cmd and not self.latexmk_cmd:
+ sys.exit("Error: pdflatex or latexmk required for PDF generation")
+
+ docs_dir = os.path.abspath(os.path.join(self.srctree, "Documentation"))
+
+ #
+ # Fill in base arguments for Sphinx build
+ #
+ kerneldoc = self.kerneldoc
+ if kerneldoc.startswith(self.srctree):
+ kerneldoc = os.path.relpath(kerneldoc, self.srctree)
+
+ args = [ "-b", builder, "-c", docs_dir ]
+
+ if builder == "latex":
+ if not paper:
+ paper = PAPER[1]
+
+ args.extend(["-D", f"latex_elements.papersize={paper}paper"])
+
+ if self.rustdoc:
+ args.extend(["-t", "rustdoc"])
+
+ if not sphinxdirs:
+ sphinxdirs = os.environ.get("SPHINXDIRS", ".")
+
+ #
+ # The sphinx-build tool has a bug: internally, it tries to set
+ # locale with locale.setlocale(locale.LC_ALL, ''). This causes a
+ # crash if language is not set. Detect and fix it.
+ #
+ try:
+ locale.setlocale(locale.LC_ALL, '')
+ except locale.Error:
+ self.env["LC_ALL"] = "C"
+
+ #
+ # sphinxdirs can be a list or a whitespace-separated string
+ #
+ sphinxdirs_list = []
+ for sphinxdir in sphinxdirs:
+ if isinstance(sphinxdir, list):
+ sphinxdirs_list += sphinxdir
+ else:
+ sphinxdirs_list += sphinxdir.split()
+
+ #
+ # Step 1: Build each directory in separate.
+ #
+ # This is not the best way of handling it, as cross-references between
+ # them will be broken, but this is what we've been doing since
+ # the beginning.
+ #
+ output_dirs = []
+ for sphinxdir in sphinxdirs_list:
+ src_dir = os.path.join(docs_dir, sphinxdir)
+ doctree_dir = os.path.join(self.builddir, ".doctrees")
+ output_dir = os.path.join(self.builddir, sphinxdir, out_dir)
+
+ #
+ # Make directory names canonical
+ #
+ src_dir = os.path.normpath(src_dir)
+ doctree_dir = os.path.normpath(doctree_dir)
+ output_dir = os.path.normpath(output_dir)
+
+ os.makedirs(doctree_dir, exist_ok=True)
+ os.makedirs(output_dir, exist_ok=True)
+
+ output_dirs.append(output_dir)
+
+ build_args = args + [
+ "-d", doctree_dir,
+ "-D", f"kerneldoc_bin={kerneldoc}",
+ "-D", f"version={self.kernelversion}",
+ "-D", f"release={self.kernelrelease}",
+ "-D", f"kerneldoc_srctree={self.srctree}",
+ src_dir,
+ output_dir,
+ ]
+
+ if target == "mandocs":
+ self.handle_man(kerneldoc, docs_dir, src_dir, output_dir)
+ elif not skip_sphinx:
+ try:
+ result = self.run_sphinx(sphinxbuild, build_args,
+ env=self.env)
+
+ if result:
+ sys.exit(f"Build failed: return code: {result}")
+
+ except (OSError, ValueError, subprocess.SubprocessError) as e:
+ sys.exit(f"Build failed: {repr(e)}")
+
+ #
+ # Ensure that each html/epub output will have needed static files
+ #
+ if target in ["htmldocs", "epubdocs"]:
+ self.handle_html(css, output_dir)
+
+ #
+ # Step 2: Some targets (PDF and info) require an extra step once
+ # sphinx-build finishes
+ #
+ if target == "pdfdocs":
+ self.handle_pdf(output_dirs, deny_vf)
+ elif target == "infodocs":
+ self.handle_info(output_dirs)
+
+def jobs_type(value):
+ """
+ Handle valid values for -j. Accepts Sphinx "-jauto", plus a number
+ equal or bigger than one.
+ """
+ if value is None:
+ return None
+
+ if value.lower() == 'auto':
+ return value.lower()
+
+ try:
+ if int(value) >= 1:
+ return value
+
+ raise argparse.ArgumentTypeError(f"Minimum jobs is 1, got {value}")
+ except ValueError:
+ raise argparse.ArgumentTypeError(f"Must be 'auto' or positive integer, got {value}") # pylint: disable=W0707
+
+def main():
+ """
+ Main function. The only mandatory argument is the target. If not
+ specified, the other arguments will use default values if not
+ specified at os.environ.
+ """
+ parser = argparse.ArgumentParser(description="Kernel documentation builder")
+
+ parser.add_argument("target", choices=list(TARGETS.keys()),
+ help="Documentation target to build")
+ parser.add_argument("--sphinxdirs", nargs="+",
+ help="Specific directories to build")
+ parser.add_argument("--builddir", default="output",
+ help="Sphinx configuration file")
+
+ parser.add_argument("--theme", help="Sphinx theme to use")
+
+ parser.add_argument("--css", help="Custom CSS file for HTML/EPUB")
+
+ parser.add_argument("--paper", choices=PAPER, default=PAPER[0],
+ help="Paper size for LaTeX/PDF output")
+
+ parser.add_argument('--deny-vf',
+ help="Configuration to deny variable fonts on pdf builds")
+
+ parser.add_argument("-v", "--verbose", action='store_true',
+ help="place build in verbose mode")
+
+ parser.add_argument('-j', '--jobs', type=jobs_type,
+ help="Sets number of jobs to use with sphinx-build")
+
+ parser.add_argument('-i', '--interactive', action='store_true',
+ help="Change latex default to run in interactive mode")
+
+ parser.add_argument('-s', '--skip-sphinx-build', action='store_true',
+ help="Skip sphinx-build step")
+
+ parser.add_argument("-V", "--venv", nargs='?', const=f'{VENV_DEFAULT}',
+ default=None,
+ help=f'If used, run Sphinx from a venv dir (default dir: {VENV_DEFAULT})')
+
+ args = parser.parse_args()
+
+ PythonVersion.check_python(MIN_PYTHON_VERSION, show_alternatives=True,
+ bail_out=True)
+
+ builder = SphinxBuilder(builddir=args.builddir, venv=args.venv,
+ verbose=args.verbose, n_jobs=args.jobs,
+ interactive=args.interactive)
+
+ builder.build(args.target, sphinxdirs=args.sphinxdirs,
+ theme=args.theme, css=args.css, paper=args.paper,
+ deny_vf=args.deny_vf,
+ skip_sphinx=args.skip_sphinx_build)
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/docs/sphinx-pre-install b/tools/docs/sphinx-pre-install
new file mode 100755
index 000000000000..965c9b093a41
--- /dev/null
+++ b/tools/docs/sphinx-pre-install
@@ -0,0 +1,1543 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2017-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+#
+# pylint: disable=C0103,C0114,C0115,C0116,C0301,C0302
+# pylint: disable=R0902,R0904,R0911,R0912,R0914,R0915,R1705,R1710,E1121
+
+# Note: this script requires at least Python 3.6 to run.
+# Don't add changes not compatible with it, it is meant to report
+# incompatible python versions.
+
+"""
+Dependency checker for Sphinx documentation Kernel build.
+
+This module provides tools to check for all required dependencies needed to
+build documentation using Sphinx, including system packages, Python modules
+and LaTeX packages for PDF generation.
+
+It detect packages for a subset of Linux distributions used by Kernel
+maintainers, showing hints and missing dependencies.
+
+The main class SphinxDependencyChecker handles the dependency checking logic
+and provides recommendations for installing missing packages. It supports both
+system package installations and Python virtual environments. By default,
+system pacage install is recommended.
+"""
+
+import argparse
+import locale
+import os
+import re
+import subprocess
+import sys
+from glob import glob
+import os.path
+
+src_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(src_dir, '../lib/python'))
+from kdoc.python_version import PythonVersion
+
+RECOMMENDED_VERSION = PythonVersion("3.4.3").version
+MIN_PYTHON_VERSION = PythonVersion("3.7").version
+
+
+class DepManager:
+ """
+ Manage package dependencies. There are three types of dependencies:
+
+ - System: dependencies required for docs build;
+ - Python: python dependencies for a native distro Sphinx install;
+ - PDF: dependencies needed by PDF builds.
+
+ Each dependency can be mandatory or optional. Not installing an optional
+ dependency won't break the build, but will cause degradation at the
+ docs output.
+ """
+
+ # Internal types of dependencies. Don't use them outside DepManager class.
+ _SYS_TYPE = 0
+ _PHY_TYPE = 1
+ _PDF_TYPE = 2
+
+ # Dependencies visible outside the class.
+ # The keys are tuple with: (type, is_mandatory flag).
+ #
+ # Currently we're not using all optional dep types. Yet, we'll keep all
+ # possible combinations here. They're not many, and that makes easier
+ # if later needed and for the name() method below
+
+ SYSTEM_MANDATORY = (_SYS_TYPE, True)
+ PYTHON_MANDATORY = (_PHY_TYPE, True)
+ PDF_MANDATORY = (_PDF_TYPE, True)
+
+ SYSTEM_OPTIONAL = (_SYS_TYPE, False)
+ PYTHON_OPTIONAL = (_PHY_TYPE, False)
+ PDF_OPTIONAL = (_PDF_TYPE, True)
+
+ def __init__(self, pdf):
+ """
+ Initialize internal vars:
+
+ - missing: missing dependencies list, containing a distro-independent
+ name for a missing dependency and its type.
+ - missing_pkg: ancillary dict containing missing dependencies in
+ distro namespace, organized by type.
+ - need: total number of needed dependencies. Never cleaned.
+ - optional: total number of optional dependencies. Never cleaned.
+ - pdf: Is PDF support enabled?
+ """
+ self.missing = {}
+ self.missing_pkg = {}
+ self.need = 0
+ self.optional = 0
+ self.pdf = pdf
+
+ @staticmethod
+ def name(dtype):
+ """
+ Ancillary routine to output a warn/error message reporting
+ missing dependencies.
+ """
+ if dtype[0] == DepManager._SYS_TYPE:
+ msg = "build"
+ elif dtype[0] == DepManager._PHY_TYPE:
+ msg = "Python"
+ else:
+ msg = "PDF"
+
+ if dtype[1]:
+ return f"ERROR: {msg} mandatory deps missing"
+ else:
+ return f"Warning: {msg} optional deps missing"
+
+ @staticmethod
+ def is_optional(dtype):
+ """Ancillary routine to report if a dependency is optional"""
+ return not dtype[1]
+
+ @staticmethod
+ def is_pdf(dtype):
+ """Ancillary routine to report if a dependency is for PDF generation"""
+ if dtype[0] == DepManager._PDF_TYPE:
+ return True
+
+ return False
+
+ def add_package(self, package, dtype):
+ """
+ Add a package at the self.missing() dictionary.
+ Doesn't update missing_pkg.
+ """
+ is_optional = DepManager.is_optional(dtype)
+ self.missing[package] = dtype
+ if is_optional:
+ self.optional += 1
+ else:
+ self.need += 1
+
+ def del_package(self, package):
+ """
+ Remove a package at the self.missing() dictionary.
+ Doesn't update missing_pkg.
+ """
+ if package in self.missing:
+ del self.missing[package]
+
+ def clear_deps(self):
+ """
+ Clear dependencies without changing needed/optional.
+
+ This is an ackward way to have a separate section to recommend
+ a package after system main dependencies.
+
+ TODO: rework the logic to prevent needing it.
+ """
+
+ self.missing = {}
+ self.missing_pkg = {}
+
+ def check_missing(self, progs):
+ """
+ Update self.missing_pkg, using progs dict to convert from the
+ agnostic package name to distro-specific one.
+
+ Returns an string with the packages to be installed, sorted and
+ with eventual duplicates removed.
+ """
+
+ self.missing_pkg = {}
+
+ for prog, dtype in sorted(self.missing.items()):
+ # At least on some LTS distros like CentOS 7, texlive doesn't
+ # provide all packages we need. When such distros are
+ # detected, we have to disable PDF output.
+ #
+ # So, we need to ignore the packages that distros would
+ # need for LaTeX to work
+ if DepManager.is_pdf(dtype) and not self.pdf:
+ self.optional -= 1
+ continue
+
+ if not dtype in self.missing_pkg:
+ self.missing_pkg[dtype] = []
+
+ self.missing_pkg[dtype].append(progs.get(prog, prog))
+
+ install = []
+ for dtype, pkgs in self.missing_pkg.items():
+ install += pkgs
+
+ return " ".join(sorted(set(install)))
+
+ def warn_install(self):
+ """
+ Emit warnings/errors related to missing packages.
+ """
+
+ output_msg = ""
+
+ for dtype in sorted(self.missing_pkg.keys()):
+ progs = " ".join(sorted(set(self.missing_pkg[dtype])))
+
+ try:
+ name = DepManager.name(dtype)
+ output_msg += f'{name}:\t{progs}\n'
+ except KeyError:
+ raise KeyError(f"ERROR!!!: invalid dtype for {progs}: {dtype}")
+
+ if output_msg:
+ print(f"\n{output_msg}")
+
+class AncillaryMethods:
+ """
+ Ancillary methods that checks for missing dependencies for different
+ types of types, like binaries, python modules, rpm deps, etc.
+ """
+
+ @staticmethod
+ def which(prog):
+ """
+ Our own implementation of which(). We could instead use
+ shutil.which(), but this function is simple enough.
+ Probably faster to use this implementation than to import shutil.
+ """
+ for path in os.environ.get("PATH", "").split(":"):
+ full_path = os.path.join(path, prog)
+ if os.access(full_path, os.X_OK):
+ return full_path
+
+ return None
+
+ @staticmethod
+ def run(*args, **kwargs):
+ """
+ Excecute a command, hiding its output by default.
+ Preserve compatibility with older Python versions.
+ """
+
+ capture_output = kwargs.pop('capture_output', False)
+
+ if capture_output:
+ if 'stdout' not in kwargs:
+ kwargs['stdout'] = subprocess.PIPE
+ if 'stderr' not in kwargs:
+ kwargs['stderr'] = subprocess.PIPE
+ else:
+ if 'stdout' not in kwargs:
+ kwargs['stdout'] = subprocess.DEVNULL
+ if 'stderr' not in kwargs:
+ kwargs['stderr'] = subprocess.DEVNULL
+
+ # Don't break with older Python versions
+ if 'text' in kwargs and sys.version_info < (3, 7):
+ kwargs['universal_newlines'] = kwargs.pop('text')
+
+ return subprocess.run(*args, **kwargs)
+
+class MissingCheckers(AncillaryMethods):
+ """
+ Contains some ancillary checkers for different types of binaries and
+ package managers.
+ """
+
+ def __init__(self, args, texlive):
+ """
+ Initialize its internal variables
+ """
+ self.pdf = args.pdf
+ self.virtualenv = args.virtualenv
+ self.version_check = args.version_check
+ self.texlive = texlive
+
+ self.min_version = (0, 0, 0)
+ self.cur_version = (0, 0, 0)
+
+ self.deps = DepManager(self.pdf)
+
+ self.need_symlink = 0
+ self.need_sphinx = 0
+
+ self.verbose_warn_install = 1
+
+ self.virtenv_dir = ""
+ self.install = ""
+ self.python_cmd = ""
+
+ self.virtenv_prefix = ["sphinx_", "Sphinx_" ]
+
+ def check_missing_file(self, files, package, dtype):
+ """
+ Does the file exists? If not, add it to missing dependencies.
+ """
+ for f in files:
+ if os.path.exists(f):
+ return
+ self.deps.add_package(package, dtype)
+
+ def check_program(self, prog, dtype):
+ """
+ Does the program exists and it is at the PATH?
+ If not, add it to missing dependencies.
+ """
+ found = self.which(prog)
+ if found:
+ return found
+
+ self.deps.add_package(prog, dtype)
+
+ return None
+
+ def check_perl_module(self, prog, dtype):
+ """
+ Does perl have a dependency? Is it available?
+ If not, add it to missing dependencies.
+
+ Right now, we still need Perl for doc build, as it is required
+ by some tools called at docs or kernel build time, like:
+
+ tools/docs/documentation-file-ref-check
+
+ Also, checkpatch is on Perl.
+ """
+
+ # While testing with lxc download template, one of the
+ # distros (Oracle) didn't have perl - nor even an option to install
+ # before installing oraclelinux-release-el9 package.
+ #
+ # Check it before running an error. If perl is not there,
+ # add it as a mandatory package, as some parts of the doc builder
+ # needs it.
+ if not self.which("perl"):
+ self.deps.add_package("perl", DepManager.SYSTEM_MANDATORY)
+ self.deps.add_package(prog, dtype)
+ return
+
+ try:
+ self.run(["perl", f"-M{prog}", "-e", "1"], check=True)
+ except subprocess.CalledProcessError:
+ self.deps.add_package(prog, dtype)
+
+ def check_python_module(self, module, is_optional=False):
+ """
+ Does a python module exists outside venv? If not, add it to missing
+ dependencies.
+ """
+ if is_optional:
+ dtype = DepManager.PYTHON_OPTIONAL
+ else:
+ dtype = DepManager.PYTHON_MANDATORY
+
+ try:
+ self.run([self.python_cmd, "-c", f"import {module}"], check=True)
+ except subprocess.CalledProcessError:
+ self.deps.add_package(module, dtype)
+
+ def check_rpm_missing(self, pkgs, dtype):
+ """
+ Does a rpm package exists? If not, add it to missing dependencies.
+ """
+ for prog in pkgs:
+ try:
+ self.run(["rpm", "-q", prog], check=True)
+ except subprocess.CalledProcessError:
+ self.deps.add_package(prog, dtype)
+
+ def check_pacman_missing(self, pkgs, dtype):
+ """
+ Does a pacman package exists? If not, add it to missing dependencies.
+ """
+ for prog in pkgs:
+ try:
+ self.run(["pacman", "-Q", prog], check=True)
+ except subprocess.CalledProcessError:
+ self.deps.add_package(prog, dtype)
+
+ def check_missing_tex(self, is_optional=False):
+ """
+ Does a LaTeX package exists? If not, add it to missing dependencies.
+ """
+ if is_optional:
+ dtype = DepManager.PDF_OPTIONAL
+ else:
+ dtype = DepManager.PDF_MANDATORY
+
+ kpsewhich = self.which("kpsewhich")
+ for prog, package in self.texlive.items():
+
+ # If kpsewhich is not there, just add it to deps
+ if not kpsewhich:
+ self.deps.add_package(package, dtype)
+ continue
+
+ # Check if the package is needed
+ try:
+ result = self.run(
+ [kpsewhich, prog], stdout=subprocess.PIPE, text=True, check=True
+ )
+
+ # Didn't find. Add it
+ if not result.stdout.strip():
+ self.deps.add_package(package, dtype)
+
+ except subprocess.CalledProcessError:
+ # kpsewhich returned an error. Add it, just in case
+ self.deps.add_package(package, dtype)
+
+ def get_sphinx_fname(self):
+ """
+ Gets the binary filename for sphinx-build.
+ """
+ if "SPHINXBUILD" in os.environ:
+ return os.environ["SPHINXBUILD"]
+
+ fname = "sphinx-build"
+ if self.which(fname):
+ return fname
+
+ fname = "sphinx-build-3"
+ if self.which(fname):
+ self.need_symlink = 1
+ return fname
+
+ return ""
+
+ def get_sphinx_version(self, cmd):
+ """
+ Gets sphinx-build version.
+ """
+ env = os.environ.copy()
+
+ # The sphinx-build tool has a bug: internally, it tries to set
+ # locale with locale.setlocale(locale.LC_ALL, ''). This causes a
+ # crash if language is not set. Detect and fix it.
+ try:
+ locale.setlocale(locale.LC_ALL, '')
+ except Exception:
+ env["LC_ALL"] = "C"
+ env["LANG"] = "C"
+
+ try:
+ result = self.run([cmd, "--version"], env=env,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ text=True, check=True)
+ except (subprocess.CalledProcessError, FileNotFoundError):
+ return None
+
+ for line in result.stdout.split("\n"):
+ match = re.match(r"^sphinx-build\s+([\d\.]+)(?:\+(?:/[\da-f]+)|b\d+)?\s*$", line)
+ if match:
+ return PythonVersion.parse_version(match.group(1))
+
+ match = re.match(r"^Sphinx.*\s+([\d\.]+)\s*$", line)
+ if match:
+ return PythonVersion.parse_version(match.group(1))
+
+ def check_sphinx(self, conf):
+ """
+ Checks Sphinx minimal requirements
+ """
+ try:
+ with open(conf, "r", encoding="utf-8") as f:
+ for line in f:
+ match = re.match(r"^\s*needs_sphinx\s*=\s*[\'\"]([\d\.]+)[\'\"]", line)
+ if match:
+ self.min_version = PythonVersion.parse_version(match.group(1))
+ break
+ except IOError:
+ sys.exit(f"Can't open {conf}")
+
+ if not self.min_version:
+ sys.exit(f"Can't get needs_sphinx version from {conf}")
+
+ self.virtenv_dir = self.virtenv_prefix[0] + "latest"
+
+ sphinx = self.get_sphinx_fname()
+ if not sphinx:
+ self.need_sphinx = 1
+ return
+
+ self.cur_version = self.get_sphinx_version(sphinx)
+ if not self.cur_version:
+ sys.exit(f"{sphinx} didn't return its version")
+
+ if self.cur_version < self.min_version:
+ curver = PythonVersion.ver_str(self.cur_version)
+ minver = PythonVersion.ver_str(self.min_version)
+
+ print(f"ERROR: Sphinx version is {curver}. It should be >= {minver}")
+ self.need_sphinx = 1
+ return
+
+ # On version check mode, just assume Sphinx has all mandatory deps
+ if self.version_check and self.cur_version >= RECOMMENDED_VERSION:
+ sys.exit(0)
+
+ def catcheck(self, filename):
+ """
+ Reads a file if it exists, returning as string.
+ If not found, returns an empty string.
+ """
+ if os.path.exists(filename):
+ with open(filename, "r", encoding="utf-8") as f:
+ return f.read().strip()
+ return ""
+
+ def get_system_release(self):
+ """
+ Determine the system type. There's no unique way that would work
+ with all distros with a minimal package install. So, several
+ methods are used here.
+
+ By default, it will use lsb_release function. If not available, it will
+ fail back to reading the known different places where the distro name
+ is stored.
+
+ Several modern distros now have /etc/os-release, which usually have
+ a decent coverage.
+ """
+
+ system_release = ""
+
+ if self.which("lsb_release"):
+ result = self.run(["lsb_release", "-d"], capture_output=True, text=True)
+ system_release = result.stdout.replace("Description:", "").strip()
+
+ release_files = [
+ "/etc/system-release",
+ "/etc/redhat-release",
+ "/etc/lsb-release",
+ "/etc/gentoo-release",
+ ]
+
+ if not system_release:
+ for f in release_files:
+ system_release = self.catcheck(f)
+ if system_release:
+ break
+
+ # This seems more common than LSB these days
+ if not system_release:
+ os_var = {}
+ try:
+ with open("/etc/os-release", "r", encoding="utf-8") as f:
+ for line in f:
+ match = re.match(r"^([\w\d\_]+)=\"?([^\"]*)\"?\n", line)
+ if match:
+ os_var[match.group(1)] = match.group(2)
+
+ system_release = os_var.get("NAME", "")
+ if "VERSION_ID" in os_var:
+ system_release += " " + os_var["VERSION_ID"]
+ elif "VERSION" in os_var:
+ system_release += " " + os_var["VERSION"]
+ except IOError:
+ pass
+
+ if not system_release:
+ system_release = self.catcheck("/etc/issue")
+
+ system_release = system_release.strip()
+
+ return system_release
+
+class SphinxDependencyChecker(MissingCheckers):
+ """
+ Main class for checking Sphinx documentation build dependencies.
+
+ - Check for missing system packages;
+ - Check for missing Python modules;
+ - Check for missing LaTeX packages needed by PDF generation;
+ - Propose Sphinx install via Python Virtual environment;
+ - Propose Sphinx install via distro-specific package install.
+ """
+ def __init__(self, args):
+ """Initialize checker variables"""
+
+ # List of required texlive packages on Fedora and OpenSuse
+ texlive = {
+ "amsfonts.sty": "texlive-amsfonts",
+ "amsmath.sty": "texlive-amsmath",
+ "amssymb.sty": "texlive-amsfonts",
+ "amsthm.sty": "texlive-amscls",
+ "anyfontsize.sty": "texlive-anyfontsize",
+ "atbegshi.sty": "texlive-oberdiek",
+ "bm.sty": "texlive-tools",
+ "capt-of.sty": "texlive-capt-of",
+ "cmap.sty": "texlive-cmap",
+ "ctexhook.sty": "texlive-ctex",
+ "ecrm1000.tfm": "texlive-ec",
+ "eqparbox.sty": "texlive-eqparbox",
+ "eu1enc.def": "texlive-euenc",
+ "fancybox.sty": "texlive-fancybox",
+ "fancyvrb.sty": "texlive-fancyvrb",
+ "float.sty": "texlive-float",
+ "fncychap.sty": "texlive-fncychap",
+ "footnote.sty": "texlive-mdwtools",
+ "framed.sty": "texlive-framed",
+ "luatex85.sty": "texlive-luatex85",
+ "multirow.sty": "texlive-multirow",
+ "needspace.sty": "texlive-needspace",
+ "palatino.sty": "texlive-psnfss",
+ "parskip.sty": "texlive-parskip",
+ "polyglossia.sty": "texlive-polyglossia",
+ "tabulary.sty": "texlive-tabulary",
+ "threeparttable.sty": "texlive-threeparttable",
+ "titlesec.sty": "texlive-titlesec",
+ "ucs.sty": "texlive-ucs",
+ "upquote.sty": "texlive-upquote",
+ "wrapfig.sty": "texlive-wrapfig",
+ }
+
+ super().__init__(args, texlive)
+
+ self.need_pip = False
+ self.rec_sphinx_upgrade = 0
+
+ self.system_release = self.get_system_release()
+ self.activate_cmd = ""
+
+ # Some distros may not have a Sphinx shipped package compatible with
+ # our minimal requirements
+ self.package_supported = True
+
+ # Recommend a new python version
+ self.recommend_python = None
+
+ # Certain hints are meant to be shown only once
+ self.distro_msg = None
+
+ self.latest_avail_ver = (0, 0, 0)
+ self.venv_ver = (0, 0, 0)
+
+ prefix = os.environ.get("srctree", ".") + "/"
+
+ self.conf = prefix + "Documentation/conf.py"
+ self.requirement_file = prefix + "Documentation/sphinx/requirements.txt"
+
+ def get_install_progs(self, progs, cmd, extra=None):
+ """
+ Check for missing dependencies using the provided program mapping.
+
+ The actual distro-specific programs are mapped via progs argument.
+ """
+ install = self.deps.check_missing(progs)
+
+ if self.verbose_warn_install:
+ self.deps.warn_install()
+
+ if not install:
+ return
+
+ if cmd:
+ if self.verbose_warn_install:
+ msg = "You should run:"
+ else:
+ msg = ""
+
+ if extra:
+ msg += "\n\t" + extra.replace("\n", "\n\t")
+
+ return(msg + "\n\tsudo " + cmd + " " + install)
+
+ return None
+
+ #
+ # Distro-specific hints methods
+ #
+
+ def give_debian_hints(self):
+ """
+ Provide package installation hints for Debian-based distros.
+ """
+ progs = {
+ "Pod::Usage": "perl-modules",
+ "convert": "imagemagick",
+ "dot": "graphviz",
+ "ensurepip": "python3-venv",
+ "python-sphinx": "python3-sphinx",
+ "rsvg-convert": "librsvg2-bin",
+ "virtualenv": "virtualenv",
+ "xelatex": "texlive-xetex",
+ "yaml": "python3-yaml",
+ }
+
+ if self.pdf:
+ pdf_pkgs = {
+ "fonts-dejavu": [
+ "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
+ ],
+ "fonts-noto-cjk": [
+ "/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
+ "/usr/share/fonts/opentype/noto/NotoSansCJK-Regular.ttc",
+ "/usr/share/fonts/opentype/noto/NotoSerifCJK-Regular.ttc",
+ ],
+ "tex-gyre": [
+ "/usr/share/texmf/tex/latex/tex-gyre/tgtermes.sty"
+ ],
+ "texlive-fonts-recommended": [
+ "/usr/share/texlive/texmf-dist/fonts/tfm/adobe/zapfding/pzdr.tfm",
+ ],
+ "texlive-lang-chinese": [
+ "/usr/share/texlive/texmf-dist/tex/latex/ctex/ctexhook.sty",
+ ],
+ }
+
+ for package, files in pdf_pkgs.items():
+ self.check_missing_file(files, package, DepManager.PDF_MANDATORY)
+
+ self.check_program("dvipng", DepManager.PDF_MANDATORY)
+
+ if not self.distro_msg:
+ self.distro_msg = \
+ "Note: ImageMagick is broken on some distros, affecting PDF output. For more details:\n" \
+ "\thttps://askubuntu.com/questions/1158894/imagemagick-still-broken-using-with-usr-bin-convert"
+
+ return self.get_install_progs(progs, "apt-get install")
+
+ def give_redhat_hints(self):
+ """
+ Provide package installation hints for RedHat-based distros
+ (Fedora, RHEL and RHEL-based variants).
+ """
+ progs = {
+ "Pod::Usage": "perl-Pod-Usage",
+ "convert": "ImageMagick",
+ "dot": "graphviz",
+ "python-sphinx": "python3-sphinx",
+ "rsvg-convert": "librsvg2-tools",
+ "virtualenv": "python3-virtualenv",
+ "xelatex": "texlive-xetex-bin",
+ "yaml": "python3-pyyaml",
+ }
+
+ fedora_tex_pkgs = [
+ "dejavu-sans-fonts",
+ "dejavu-sans-mono-fonts",
+ "dejavu-serif-fonts",
+ "texlive-collection-fontsrecommended",
+ "texlive-collection-latex",
+ "texlive-xecjk",
+ ]
+
+ fedora = False
+ rel = None
+
+ match = re.search(r"(release|Linux)\s+(\d+)", self.system_release)
+ if match:
+ rel = int(match.group(2))
+
+ if not rel:
+ print("Couldn't identify release number")
+ noto_sans_redhat = None
+ self.pdf = False
+ elif re.search("Fedora", self.system_release):
+ # Fedora 38 and upper use this CJK font
+
+ noto_sans_redhat = "google-noto-sans-cjk-fonts"
+ fedora = True
+ else:
+ # Almalinux, CentOS, RHEL, ...
+
+ # at least up to version 9 (and Fedora < 38), that's the CJK font
+ noto_sans_redhat = "google-noto-sans-cjk-ttc-fonts"
+
+ progs["virtualenv"] = "python-virtualenv"
+
+ if not rel or rel < 8:
+ print("ERROR: Distro not supported. Too old?")
+ return
+
+ # RHEL 8 uses Python 3.6, which is not compatible with
+ # the build system anymore. Suggest Python 3.11
+ if rel == 8:
+ self.check_program("python3.9", DepManager.SYSTEM_MANDATORY)
+ progs["python3.9"] = "python39"
+ progs["yaml"] = "python39-pyyaml"
+
+ self.recommend_python = True
+
+ # There's no python39-sphinx package. Only pip is supported
+ self.package_supported = False
+
+ if not self.distro_msg:
+ self.distro_msg = \
+ "Note: RHEL-based distros typically require extra repositories.\n" \
+ "For most, enabling epel and crb are enough:\n" \
+ "\tsudo dnf install -y epel-release\n" \
+ "\tsudo dnf config-manager --set-enabled crb\n" \
+ "Yet, some may have other required repositories. Those commands could be useful:\n" \
+ "\tsudo dnf repolist all\n" \
+ "\tsudo dnf repoquery --available --info <pkgs>\n" \
+ "\tsudo dnf config-manager --set-enabled '*' # enable all - probably not what you want"
+
+ if self.pdf:
+ pdf_pkgs = [
+ "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+ "/usr/share/fonts/google-noto-sans-cjk-fonts/NotoSansCJK-Regular.ttc",
+ ]
+
+ self.check_missing_file(pdf_pkgs, noto_sans_redhat, DepManager.PDF_MANDATORY)
+
+ self.check_rpm_missing(fedora_tex_pkgs, DepManager.PDF_MANDATORY)
+
+ self.check_missing_tex(DepManager.PDF_MANDATORY)
+
+ # There's no texlive-ctex on RHEL 8 repositories. This will
+ # likely affect CJK pdf build only.
+ if not fedora and rel == 8:
+ self.deps.del_package("texlive-ctex")
+
+ return self.get_install_progs(progs, "dnf install")
+
+ def give_opensuse_hints(self):
+ """
+ Provide package installation hints for openSUSE-based distros
+ (Leap and Tumbleweed).
+ """
+ progs = {
+ "Pod::Usage": "perl-Pod-Usage",
+ "convert": "ImageMagick",
+ "dot": "graphviz",
+ "python-sphinx": "python3-sphinx",
+ "virtualenv": "python3-virtualenv",
+ "xelatex": "texlive-xetex-bin texlive-dejavu",
+ "yaml": "python3-pyyaml",
+ }
+
+ suse_tex_pkgs = [
+ "texlive-babel-english",
+ "texlive-caption",
+ "texlive-colortbl",
+ "texlive-courier",
+ "texlive-dvips",
+ "texlive-helvetic",
+ "texlive-makeindex",
+ "texlive-metafont",
+ "texlive-metapost",
+ "texlive-palatino",
+ "texlive-preview",
+ "texlive-times",
+ "texlive-zapfchan",
+ "texlive-zapfding",
+ ]
+
+ progs["latexmk"] = "texlive-latexmk-bin"
+
+ match = re.search(r"(Leap)\s+(\d+).(\d)", self.system_release)
+ if match:
+ rel = int(match.group(2))
+
+ # Leap 15.x uses Python 3.6, which is not compatible with
+ # the build system anymore. Suggest Python 3.11
+ if rel == 15:
+ if not self.which(self.python_cmd):
+ self.check_program("python3.11", DepManager.SYSTEM_MANDATORY)
+ progs["python3.11"] = "python311"
+ self.recommend_python = True
+
+ progs.update({
+ "python-sphinx": "python311-Sphinx python311-Sphinx-latex",
+ "virtualenv": "python311-virtualenv",
+ "yaml": "python311-PyYAML",
+ })
+ else:
+ # Tumbleweed defaults to Python 3.11
+
+ progs.update({
+ "python-sphinx": "python313-Sphinx python313-Sphinx-latex",
+ "virtualenv": "python313-virtualenv",
+ "yaml": "python313-PyYAML",
+ })
+
+ # FIXME: add support for installing CJK fonts
+ #
+ # I tried hard, but was unable to find a way to install
+ # "Noto Sans CJK SC" on openSUSE
+
+ if self.pdf:
+ self.check_rpm_missing(suse_tex_pkgs, DepManager.PDF_MANDATORY)
+ if self.pdf:
+ self.check_missing_tex()
+
+ return self.get_install_progs(progs, "zypper install --no-recommends")
+
+ def give_mageia_hints(self):
+ """
+ Provide package installation hints for Mageia and OpenMandriva.
+ """
+ progs = {
+ "Pod::Usage": "perl-Pod-Usage",
+ "convert": "ImageMagick",
+ "dot": "graphviz",
+ "python-sphinx": "python3-sphinx",
+ "rsvg-convert": "librsvg2",
+ "virtualenv": "python3-virtualenv",
+ "xelatex": "texlive",
+ "yaml": "python3-yaml",
+ }
+
+ tex_pkgs = [
+ "texlive-fontsextra",
+ "texlive-fonts-asian",
+ "fonts-ttf-dejavu",
+ ]
+
+ if re.search(r"OpenMandriva", self.system_release):
+ packager_cmd = "dnf install"
+ noto_sans = "noto-sans-cjk-fonts"
+ tex_pkgs = [
+ "texlive-collection-basic",
+ "texlive-collection-langcjk",
+ "texlive-collection-fontsextra",
+ "texlive-collection-fontsrecommended"
+ ]
+
+ # Tested on OpenMandriva Lx 4.3
+ progs["convert"] = "imagemagick"
+ progs["yaml"] = "python-pyyaml"
+ progs["python-virtualenv"] = "python-virtualenv"
+ progs["python-sphinx"] = "python-sphinx"
+ progs["xelatex"] = "texlive"
+
+ self.check_program("python-virtualenv", DepManager.PYTHON_MANDATORY)
+
+ # On my tests with openMandriva LX 4.0 docker image, upgraded
+ # to 4.3, python-virtualenv package is broken: it is missing
+ # ensurepip. Without it, the alternative would be to run:
+ # python3 -m venv --without-pip ~/sphinx_latest, but running
+ # pip there won't install sphinx at venv.
+ #
+ # Add a note about that.
+
+ if not self.distro_msg:
+ self.distro_msg = \
+ "Notes:\n"\
+ "1. for venv, ensurepip could be broken, preventing its install method.\n" \
+ "2. at least on OpenMandriva LX 4.3, texlive packages seem broken"
+
+ else:
+ packager_cmd = "urpmi"
+ noto_sans = "google-noto-sans-cjk-ttc-fonts"
+
+ progs["latexmk"] = "texlive-collection-basic"
+
+ if self.pdf:
+ pdf_pkgs = [
+ "/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+ "/usr/share/fonts/TTF/NotoSans-Regular.ttf",
+ ]
+
+ self.check_missing_file(pdf_pkgs, noto_sans, DepManager.PDF_MANDATORY)
+ self.check_rpm_missing(tex_pkgs, DepManager.PDF_MANDATORY)
+
+ return self.get_install_progs(progs, packager_cmd)
+
+ def give_arch_linux_hints(self):
+ """
+ Provide package installation hints for ArchLinux.
+ """
+ progs = {
+ "convert": "imagemagick",
+ "dot": "graphviz",
+ "latexmk": "texlive-core",
+ "rsvg-convert": "extra/librsvg",
+ "virtualenv": "python-virtualenv",
+ "xelatex": "texlive-xetex",
+ "yaml": "python-yaml",
+ }
+
+ archlinux_tex_pkgs = [
+ "texlive-basic",
+ "texlive-binextra",
+ "texlive-core",
+ "texlive-fontsrecommended",
+ "texlive-langchinese",
+ "texlive-langcjk",
+ "texlive-latexextra",
+ "ttf-dejavu",
+ ]
+
+ if self.pdf:
+ self.check_pacman_missing(archlinux_tex_pkgs,
+ DepManager.PDF_MANDATORY)
+
+ self.check_missing_file(["/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc"],
+ "noto-fonts-cjk",
+ DepManager.PDF_MANDATORY)
+
+
+ return self.get_install_progs(progs, "pacman -S")
+
+ def give_gentoo_hints(self):
+ """
+ Provide package installation hints for Gentoo.
+ """
+ texlive_deps = [
+ "dev-texlive/texlive-fontsrecommended",
+ "dev-texlive/texlive-latexextra",
+ "dev-texlive/texlive-xetex",
+ "media-fonts/dejavu",
+ ]
+
+ progs = {
+ "convert": "media-gfx/imagemagick",
+ "dot": "media-gfx/graphviz",
+ "rsvg-convert": "gnome-base/librsvg",
+ "virtualenv": "dev-python/virtualenv",
+ "xelatex": " ".join(texlive_deps),
+ "yaml": "dev-python/pyyaml",
+ "python-sphinx": "dev-python/sphinx",
+ }
+
+ if self.pdf:
+ pdf_pkgs = {
+ "media-fonts/dejavu": [
+ "/usr/share/fonts/dejavu/DejaVuSans.ttf",
+ ],
+ "media-fonts/noto-cjk": [
+ "/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf",
+ "/usr/share/fonts/noto-cjk/NotoSerifCJK-Regular.ttc",
+ ],
+ }
+ for package, files in pdf_pkgs.items():
+ self.check_missing_file(files, package, DepManager.PDF_MANDATORY)
+
+ # Handling dependencies is a nightmare, as Gentoo refuses to emerge
+ # some packages if there's no package.use file describing them.
+ # To make it worse, compilation flags shall also be present there
+ # for some packages. If USE is not perfect, error/warning messages
+ # like those are shown:
+ #
+ # !!! The following binary packages have been ignored due to non matching USE:
+ #
+ # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_13 qt6 svg
+ # =media-gfx/graphviz-12.2.1-r1 X pdf python_single_target_python3_12 -python_single_target_python3_13 qt6 svg
+ # =media-gfx/graphviz-12.2.1-r1 X pdf qt6 svg
+ # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_10 qt6 svg
+ # =media-gfx/graphviz-12.2.1-r1 X pdf -python_single_target_python3_10 python_single_target_python3_12 -python_single_target_python3_13 qt6 svg
+ # =media-fonts/noto-cjk-20190416 X
+ # =app-text/texlive-core-2024-r1 X cjk -xetex
+ # =app-text/texlive-core-2024-r1 X -xetex
+ # =app-text/texlive-core-2024-r1 -xetex
+ # =dev-libs/zziplib-0.13.79-r1 sdl
+ #
+ # And will ignore such packages, installing the remaining ones. That
+ # affects mostly the image extension and PDF generation.
+
+ # Package dependencies and the minimal needed args:
+ portages = {
+ "graphviz": "media-gfx/graphviz",
+ "imagemagick": "media-gfx/imagemagick",
+ "media-libs": "media-libs/harfbuzz icu",
+ "media-fonts": "media-fonts/noto-cjk",
+ "texlive": "app-text/texlive-core xetex",
+ "zziblib": "dev-libs/zziplib sdl",
+ }
+
+ extra_cmds = ""
+ if not self.distro_msg:
+ self.distro_msg = "Note: Gentoo requires package.use to be adjusted before emerging packages"
+
+ use_base = "/etc/portage/package.use"
+ files = glob(f"{use_base}/*")
+
+ for fname, portage in portages.items():
+ install = False
+
+ while install is False:
+ if not files:
+ # No files under package.usage. Install all
+ install = True
+ break
+
+ args = portage.split(" ")
+
+ name = args.pop(0)
+
+ cmd = ["grep", "-l", "-E", rf"^{name}\b" ] + files
+ result = self.run(cmd, stdout=subprocess.PIPE, text=True)
+ if result.returncode or not result.stdout.strip():
+ # File containing portage name not found
+ install = True
+ break
+
+ # Ensure that needed USE flags are present
+ if args:
+ match_fname = result.stdout.strip()
+ with open(match_fname, 'r', encoding='utf8',
+ errors='backslashreplace') as fp:
+ for line in fp:
+ for arg in args:
+ if arg.startswith("-"):
+ continue
+
+ if not re.search(rf"\s*{arg}\b", line):
+ # Needed file argument not found
+ install = True
+ break
+
+ # Everything looks ok, don't install
+ break
+
+ # emit a code to setup missing USE
+ if install:
+ extra_cmds += (f"sudo su -c 'echo \"{portage}\" > {use_base}/{fname}'\n")
+
+ # Now, we can use emerge and let it respect USE
+ return self.get_install_progs(progs,
+ "emerge --ask --changed-use --binpkg-respect-use=y",
+ extra_cmds)
+
+ def get_install(self):
+ """
+ OS-specific hints logic. Seeks for a hinter. If found, use it to
+ provide package-manager specific install commands.
+
+ Otherwise, outputs install instructions for the meta-packages.
+
+ Returns a string with the command to be executed to install the
+ the needed packages, if distro found. Otherwise, return just a
+ list of packages that require installation.
+ """
+ os_hints = {
+ re.compile("Red Hat Enterprise Linux"): self.give_redhat_hints,
+ re.compile("Fedora"): self.give_redhat_hints,
+ re.compile("AlmaLinux"): self.give_redhat_hints,
+ re.compile("Amazon Linux"): self.give_redhat_hints,
+ re.compile("CentOS"): self.give_redhat_hints,
+ re.compile("openEuler"): self.give_redhat_hints,
+ re.compile("Oracle Linux Server"): self.give_redhat_hints,
+ re.compile("Rocky Linux"): self.give_redhat_hints,
+ re.compile("Springdale Open Enterprise"): self.give_redhat_hints,
+
+ re.compile("Ubuntu"): self.give_debian_hints,
+ re.compile("Debian"): self.give_debian_hints,
+ re.compile("Devuan"): self.give_debian_hints,
+ re.compile("Kali"): self.give_debian_hints,
+ re.compile("Mint"): self.give_debian_hints,
+
+ re.compile("openSUSE"): self.give_opensuse_hints,
+
+ re.compile("Mageia"): self.give_mageia_hints,
+ re.compile("OpenMandriva"): self.give_mageia_hints,
+
+ re.compile("Arch Linux"): self.give_arch_linux_hints,
+ re.compile("Gentoo"): self.give_gentoo_hints,
+ }
+
+ # If the OS is detected, use per-OS hint logic
+ for regex, os_hint in os_hints.items():
+ if regex.search(self.system_release):
+ return os_hint()
+
+ #
+ # Fall-back to generic hint code for other distros
+ # That's far from ideal, specially for LaTeX dependencies.
+ #
+ progs = {"sphinx-build": "sphinx"}
+ if self.pdf:
+ self.check_missing_tex()
+
+ self.distro_msg = \
+ f"I don't know distro {self.system_release}.\n" \
+ "So, I can't provide you a hint with the install procedure.\n" \
+ "There are likely missing dependencies."
+
+ return self.get_install_progs(progs, None)
+
+ #
+ # Common dependencies
+ #
+ def deactivate_help(self):
+ """
+ Print a helper message to disable a virtual environment.
+ """
+
+ print("\n If you want to exit the virtualenv, you can use:")
+ print("\tdeactivate")
+
+ def get_virtenv(self):
+ """
+ Give a hint about how to activate an already-existing virtual
+ environment containing sphinx-build.
+
+ Returns a tuble with (activate_cmd_path, sphinx_version) with
+ the newest available virtual env.
+ """
+
+ cwd = os.getcwd()
+
+ activates = []
+
+ # Add all sphinx prefixes with possible version numbers
+ for p in self.virtenv_prefix:
+ activates += glob(f"{cwd}/{p}[0-9]*/bin/activate")
+
+ activates.sort(reverse=True, key=str.lower)
+
+ # Place sphinx_latest first, if it exists
+ for p in self.virtenv_prefix:
+ activates = glob(f"{cwd}/{p}*latest/bin/activate") + activates
+
+ ver = (0, 0, 0)
+ for f in activates:
+ # Discard too old Sphinx virtual environments
+ match = re.search(r"(\d+)\.(\d+)\.(\d+)", f)
+ if match:
+ ver = (int(match.group(1)), int(match.group(2)), int(match.group(3)))
+
+ if ver < self.min_version:
+ continue
+
+ sphinx_cmd = f.replace("activate", "sphinx-build")
+ if not os.path.isfile(sphinx_cmd):
+ continue
+
+ ver = self.get_sphinx_version(sphinx_cmd)
+
+ if not ver:
+ venv_dir = f.replace("/bin/activate", "")
+ print(f"Warning: virtual environment {venv_dir} is not working.\n" \
+ "Python version upgrade? Remove it with:\n\n" \
+ "\trm -rf {venv_dir}\n\n")
+ else:
+ if self.need_sphinx and ver >= self.min_version:
+ return (f, ver)
+ elif PythonVersion.parse_version(ver) > self.cur_version:
+ return (f, ver)
+
+ return ("", ver)
+
+ def recommend_sphinx_upgrade(self):
+ """
+ Check if Sphinx needs to be upgraded.
+
+ Returns a tuple with the higest available Sphinx version if found.
+ Otherwise, returns None to indicate either that no upgrade is needed
+ or no venv was found.
+ """
+
+ # Avoid running sphinx-builds from venv if cur_version is good
+ if self.cur_version and self.cur_version >= RECOMMENDED_VERSION:
+ self.latest_avail_ver = self.cur_version
+ return None
+
+ # Get the highest version from sphinx_*/bin/sphinx-build and the
+ # corresponding command to activate the venv/virtenv
+ self.activate_cmd, self.venv_ver = self.get_virtenv()
+
+ # Store the highest version from Sphinx existing virtualenvs
+ if self.activate_cmd and self.venv_ver > self.cur_version:
+ self.latest_avail_ver = self.venv_ver
+ else:
+ if self.cur_version:
+ self.latest_avail_ver = self.cur_version
+ else:
+ self.latest_avail_ver = (0, 0, 0)
+
+ # As we don't know package version of Sphinx, and there's no
+ # virtual environments, don't check if upgrades are needed
+ if not self.virtualenv:
+ if not self.latest_avail_ver:
+ return None
+
+ return self.latest_avail_ver
+
+ # Either there are already a virtual env or a new one should be created
+ self.need_pip = True
+
+ if not self.latest_avail_ver:
+ return None
+
+ # Return if the reason is due to an upgrade or not
+ if self.latest_avail_ver != (0, 0, 0):
+ if self.latest_avail_ver < RECOMMENDED_VERSION:
+ self.rec_sphinx_upgrade = 1
+
+ return self.latest_avail_ver
+
+ def recommend_package(self):
+ """
+ Recommend installing Sphinx as a distro-specific package.
+ """
+
+ print("\n2) As a package with:")
+
+ old_need = self.deps.need
+ old_optional = self.deps.optional
+
+ self.pdf = False
+ self.deps.optional = 0
+ old_verbose = self.verbose_warn_install
+ self.verbose_warn_install = 0
+
+ self.deps.clear_deps()
+
+ self.deps.add_package("python-sphinx", DepManager.PYTHON_MANDATORY)
+
+ cmd = self.get_install()
+ if cmd:
+ print(cmd)
+
+ self.deps.need = old_need
+ self.deps.optional = old_optional
+ self.verbose_warn_install = old_verbose
+
+ def recommend_sphinx_version(self, virtualenv_cmd):
+ """
+ Provide recommendations for installing or upgrading Sphinx based
+ on current version.
+
+ The logic here is complex, as it have to deal with different versions:
+
+ - minimal supported version;
+ - minimal PDF version;
+ - recommended version.
+
+ It also needs to work fine with both distro's package and
+ venv/virtualenv
+ """
+
+ if self.recommend_python:
+ cur_ver = sys.version_info[:3]
+ if cur_ver < MIN_PYTHON_VERSION:
+ print(f"\nPython version {cur_ver} is incompatible with doc build.\n" \
+ "Please upgrade it and re-run.\n")
+ return
+
+ # Version is OK. Nothing to do.
+ if self.cur_version != (0, 0, 0) and self.cur_version >= RECOMMENDED_VERSION:
+ return
+
+ if self.latest_avail_ver:
+ latest_avail_ver = PythonVersion.ver_str(self.latest_avail_ver)
+
+ if not self.need_sphinx:
+ # sphinx-build is present and its version is >= $min_version
+
+ # only recommend enabling a newer virtenv version if makes sense.
+ if self.latest_avail_ver and self.latest_avail_ver > self.cur_version:
+ print(f"\nYou may also use the newer Sphinx version {latest_avail_ver} with:")
+ if f"{self.virtenv_prefix}" in os.getcwd():
+ print("\tdeactivate")
+ print(f"\t. {self.activate_cmd}")
+ self.deactivate_help()
+ return
+
+ if self.latest_avail_ver and self.latest_avail_ver >= RECOMMENDED_VERSION:
+ return
+
+ if not self.virtualenv:
+ # No sphinx either via package or via virtenv. As we can't
+ # Compare the versions here, just return, recommending the
+ # user to install it from the package distro.
+ if not self.latest_avail_ver or self.latest_avail_ver == (0, 0, 0):
+ return
+
+ # User doesn't want a virtenv recommendation, but he already
+ # installed one via virtenv with a newer version.
+ # So, print commands to enable it
+ if self.latest_avail_ver > self.cur_version:
+ print(f"\nYou may also use the Sphinx virtualenv version {latest_avail_ver} with:")
+ if f"{self.virtenv_prefix}" in os.getcwd():
+ print("\tdeactivate")
+ print(f"\t. {self.activate_cmd}")
+ self.deactivate_help()
+ return
+ print("\n")
+ else:
+ if self.need_sphinx:
+ self.deps.need += 1
+
+ # Suggest newer versions if current ones are too old
+ if self.latest_avail_ver and self.latest_avail_ver >= self.min_version:
+ if self.latest_avail_ver >= RECOMMENDED_VERSION:
+ print(f"\nNeed to activate Sphinx (version {latest_avail_ver}) on virtualenv with:")
+ print(f"\t. {self.activate_cmd}")
+ self.deactivate_help()
+ return
+
+ # Version is above the minimal required one, but may be
+ # below the recommended one. So, print warnings/notes
+ if self.latest_avail_ver < RECOMMENDED_VERSION:
+ print(f"Warning: It is recommended at least Sphinx version {RECOMMENDED_VERSION}.")
+
+ # At this point, either it needs Sphinx or upgrade is recommended,
+ # both via pip
+
+ if self.rec_sphinx_upgrade:
+ if not self.virtualenv:
+ print("Instead of install/upgrade Python Sphinx pkg, you could use pip/pypi with:\n\n")
+ else:
+ print("To upgrade Sphinx, use:\n\n")
+ else:
+ print("\nSphinx needs to be installed either:\n1) via pip/pypi with:\n")
+
+ if not virtualenv_cmd:
+ print(" Currently not possible.\n")
+ print(" Please upgrade Python to a newer version and run this script again")
+ else:
+ print(f"\t{virtualenv_cmd} {self.virtenv_dir}")
+ print(f"\t. {self.virtenv_dir}/bin/activate")
+ print(f"\tpip install -r {self.requirement_file}")
+ self.deactivate_help()
+
+ if self.package_supported:
+ self.recommend_package()
+
+ print("\n" \
+ " Please note that Sphinx currentlys produce false-positive\n" \
+ " warnings when the same name is used for more than one type (functions,\n" \
+ " structs, enums,...). This is known Sphinx bug. For more details, see:\n" \
+ "\thttps://github.com/sphinx-doc/sphinx/pull/8313")
+
+ def check_needs(self):
+ """
+ Main method that checks needed dependencies and provides
+ recommendations.
+ """
+ self.python_cmd = sys.executable
+
+ # Check if Sphinx is already accessible from current environment
+ self.check_sphinx(self.conf)
+
+ if self.system_release:
+ print(f"Detected OS: {self.system_release}.")
+ else:
+ print("Unknown OS")
+ if self.cur_version != (0, 0, 0):
+ ver = PythonVersion.ver_str(self.cur_version)
+ print(f"Sphinx version: {ver}\n")
+
+ # Check the type of virtual env, depending on Python version
+ virtualenv_cmd = None
+
+ if sys.version_info < MIN_PYTHON_VERSION:
+ min_ver = ver_str(MIN_PYTHON_VERSION)
+ print(f"ERROR: at least python {min_ver} is required to build the kernel docs")
+ self.need_sphinx = 1
+
+ self.venv_ver = self.recommend_sphinx_upgrade()
+
+ if self.need_pip:
+ if sys.version_info < MIN_PYTHON_VERSION:
+ self.need_pip = False
+ print("Warning: python version is not supported.")
+ else:
+ virtualenv_cmd = f"{self.python_cmd} -m venv"
+ self.check_python_module("ensurepip")
+
+ # Check for needed programs/tools
+ self.check_perl_module("Pod::Usage", DepManager.SYSTEM_MANDATORY)
+
+ self.check_program("make", DepManager.SYSTEM_MANDATORY)
+ self.check_program("which", DepManager.SYSTEM_MANDATORY)
+
+ self.check_program("dot", DepManager.SYSTEM_OPTIONAL)
+ self.check_program("convert", DepManager.SYSTEM_OPTIONAL)
+
+ self.check_python_module("yaml")
+
+ if self.pdf:
+ self.check_program("xelatex", DepManager.PDF_MANDATORY)
+ self.check_program("rsvg-convert", DepManager.PDF_MANDATORY)
+ self.check_program("latexmk", DepManager.PDF_MANDATORY)
+
+ # Do distro-specific checks and output distro-install commands
+ cmd = self.get_install()
+ if cmd:
+ print(cmd)
+
+ # If distro requires some special instructions, print here.
+ # Please notice that get_install() needs to be called first.
+ if self.distro_msg:
+ print("\n" + self.distro_msg)
+
+ if not self.python_cmd:
+ if self.need == 1:
+ sys.exit("Can't build as 1 mandatory dependency is missing")
+ elif self.need:
+ sys.exit(f"Can't build as {self.need} mandatory dependencies are missing")
+
+ # Check if sphinx-build is called sphinx-build-3
+ if self.need_symlink:
+ sphinx_path = self.which("sphinx-build-3")
+ if sphinx_path:
+ print(f"\tsudo ln -sf {sphinx_path} /usr/bin/sphinx-build\n")
+
+ self.recommend_sphinx_version(virtualenv_cmd)
+ print("")
+
+ if not self.deps.optional:
+ print("All optional dependencies are met.")
+
+ if self.deps.need == 1:
+ sys.exit("Can't build as 1 mandatory dependency is missing")
+ elif self.deps.need:
+ sys.exit(f"Can't build as {self.deps.need} mandatory dependencies are missing")
+
+ print("Needed package dependencies are met.")
+
+DESCRIPTION = """
+Process some flags related to Sphinx installation and documentation build.
+"""
+
+
+def main():
+ """Main function"""
+ parser = argparse.ArgumentParser(description=DESCRIPTION)
+
+ parser.add_argument(
+ "--no-virtualenv",
+ action="store_false",
+ dest="virtualenv",
+ help="Recommend installing Sphinx instead of using a virtualenv",
+ )
+
+ parser.add_argument(
+ "--no-pdf",
+ action="store_false",
+ dest="pdf",
+ help="Don't check for dependencies required to build PDF docs",
+ )
+
+ parser.add_argument(
+ "--version-check",
+ action="store_true",
+ dest="version_check",
+ help="If version is compatible, don't check for missing dependencies",
+ )
+
+ args = parser.parse_args()
+
+ checker = SphinxDependencyChecker(args)
+
+ PythonVersion.check_python(MIN_PYTHON_VERSION,
+ bail_out=True, success_on_error=True)
+ checker.check_needs()
+
+# Call main if not used as module
+if __name__ == "__main__":
+ main()
diff --git a/tools/docs/test_doc_build.py b/tools/docs/test_doc_build.py
new file mode 100755
index 000000000000..47b4606569f9
--- /dev/null
+++ b/tools/docs/test_doc_build.py
@@ -0,0 +1,513 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+#
+# pylint: disable=R0903,R0912,R0913,R0914,R0917,C0301
+
+"""
+Install minimal supported requirements for different Sphinx versions
+and optionally test the build.
+"""
+
+import argparse
+import asyncio
+import os.path
+import shutil
+import sys
+import time
+import subprocess
+
+# Minimal python version supported by the building system.
+
+PYTHON = os.path.basename(sys.executable)
+
+min_python_bin = None
+
+for i in range(9, 13):
+ p = f"python3.{i}"
+ if shutil.which(p):
+ min_python_bin = p
+ break
+
+if not min_python_bin:
+ min_python_bin = PYTHON
+
+# Starting from 8.0, Python 3.9 is not supported anymore.
+PYTHON_VER_CHANGES = {(8, 0, 0): PYTHON}
+
+DEFAULT_VERSIONS_TO_TEST = [
+ (3, 4, 3), # Minimal supported version
+ (5, 3, 0), # CentOS Stream 9 / AlmaLinux 9
+ (6, 1, 1), # Debian 12
+ (7, 2, 1), # openSUSE Leap 15.6
+ (7, 2, 6), # Ubuntu 24.04 LTS
+ (7, 4, 7), # Ubuntu 24.10
+ (7, 3, 0), # openSUSE Tumbleweed
+ (8, 1, 3), # Fedora 42
+ (8, 2, 3) # Latest version - covers rolling distros
+]
+
+# Sphinx versions to be installed and their incremental requirements
+SPHINX_REQUIREMENTS = {
+ # Oldest versions we support for each package required by Sphinx 3.4.3
+ (3, 4, 3): {
+ "docutils": "0.16",
+ "alabaster": "0.7.12",
+ "babel": "2.8.0",
+ "certifi": "2020.6.20",
+ "docutils": "0.16",
+ "idna": "2.10",
+ "imagesize": "1.2.0",
+ "Jinja2": "2.11.2",
+ "MarkupSafe": "1.1.1",
+ "packaging": "20.4",
+ "Pygments": "2.6.1",
+ "PyYAML": "5.1",
+ "requests": "2.24.0",
+ "snowballstemmer": "2.0.0",
+ "sphinxcontrib-applehelp": "1.0.2",
+ "sphinxcontrib-devhelp": "1.0.2",
+ "sphinxcontrib-htmlhelp": "1.0.3",
+ "sphinxcontrib-jsmath": "1.0.1",
+ "sphinxcontrib-qthelp": "1.0.3",
+ "sphinxcontrib-serializinghtml": "1.1.4",
+ "urllib3": "1.25.9",
+ },
+
+ # Update package dependencies to a more modern base. The goal here
+ # is to avoid to many incremental changes for the next entries
+ (3, 5, 0): {
+ "alabaster": "0.7.13",
+ "babel": "2.17.0",
+ "certifi": "2025.6.15",
+ "idna": "3.10",
+ "imagesize": "1.4.1",
+ "packaging": "25.0",
+ "Pygments": "2.8.1",
+ "requests": "2.32.4",
+ "snowballstemmer": "3.0.1",
+ "sphinxcontrib-applehelp": "1.0.4",
+ "sphinxcontrib-htmlhelp": "2.0.1",
+ "sphinxcontrib-serializinghtml": "1.1.5",
+ "urllib3": "2.0.0",
+ },
+
+ # Starting from here, ensure all docutils versions are covered with
+ # supported Sphinx versions. Other packages are upgraded only when
+ # required by pip
+ (4, 0, 0): {
+ "PyYAML": "5.1",
+ },
+ (4, 1, 0): {
+ "docutils": "0.17",
+ "Pygments": "2.19.1",
+ "Jinja2": "3.0.3",
+ "MarkupSafe": "2.0",
+ },
+ (4, 3, 0): {},
+ (4, 4, 0): {},
+ (4, 5, 0): {
+ "docutils": "0.17.1",
+ },
+ (5, 0, 0): {},
+ (5, 1, 0): {},
+ (5, 2, 0): {
+ "docutils": "0.18",
+ "Jinja2": "3.1.2",
+ "MarkupSafe": "2.0",
+ "PyYAML": "5.3.1",
+ },
+ (5, 3, 0): {
+ "docutils": "0.18.1",
+ },
+ (6, 0, 0): {},
+ (6, 1, 0): {},
+ (6, 2, 0): {
+ "PyYAML": "5.4.1",
+ },
+ (7, 0, 0): {},
+ (7, 1, 0): {},
+ (7, 2, 0): {
+ "docutils": "0.19",
+ "PyYAML": "6.0.1",
+ "sphinxcontrib-serializinghtml": "1.1.9",
+ },
+ (7, 2, 6): {
+ "docutils": "0.20",
+ },
+ (7, 3, 0): {
+ "alabaster": "0.7.14",
+ "PyYAML": "6.0.1",
+ "tomli": "2.0.1",
+ },
+ (7, 4, 0): {
+ "docutils": "0.20.1",
+ "PyYAML": "6.0.1",
+ },
+ (8, 0, 0): {
+ "docutils": "0.21",
+ },
+ (8, 1, 0): {
+ "docutils": "0.21.1",
+ "PyYAML": "6.0.1",
+ "sphinxcontrib-applehelp": "1.0.7",
+ "sphinxcontrib-devhelp": "1.0.6",
+ "sphinxcontrib-htmlhelp": "2.0.6",
+ "sphinxcontrib-qthelp": "1.0.6",
+ },
+ (8, 2, 0): {
+ "docutils": "0.21.2",
+ "PyYAML": "6.0.1",
+ "sphinxcontrib-serializinghtml": "1.1.9",
+ },
+}
+
+
+class AsyncCommands:
+ """Excecute command synchronously"""
+
+ def __init__(self, fp=None):
+
+ self.stdout = None
+ self.stderr = None
+ self.output = None
+ self.fp = fp
+
+ def log(self, out, verbose, is_info=True):
+ out = out.removesuffix('\n')
+
+ if verbose:
+ if is_info:
+ print(out)
+ else:
+ print(out, file=sys.stderr)
+
+ if self.fp:
+ self.fp.write(out + "\n")
+
+ async def _read(self, stream, verbose, is_info):
+ """Ancillary routine to capture while displaying"""
+
+ while stream is not None:
+ line = await stream.readline()
+ if line:
+ out = line.decode("utf-8", errors="backslashreplace")
+ self.log(out, verbose, is_info)
+ if is_info:
+ self.stdout += out
+ else:
+ self.stderr += out
+ else:
+ break
+
+ async def run(self, cmd, capture_output=False, check=False,
+ env=None, verbose=True):
+
+ """
+ Execute an arbitrary command, handling errors.
+
+ Please notice that this class is not thread safe
+ """
+
+ self.stdout = ""
+ self.stderr = ""
+
+ self.log("$ " + " ".join(cmd), verbose)
+
+ proc = await asyncio.create_subprocess_exec(cmd[0],
+ *cmd[1:],
+ env=env,
+ stdout=asyncio.subprocess.PIPE,
+ stderr=asyncio.subprocess.PIPE)
+
+ # Handle input and output in realtime
+ await asyncio.gather(
+ self._read(proc.stdout, verbose, True),
+ self._read(proc.stderr, verbose, False),
+ )
+
+ await proc.wait()
+
+ if check and proc.returncode > 0:
+ raise subprocess.CalledProcessError(returncode=proc.returncode,
+ cmd=" ".join(cmd),
+ output=self.stdout,
+ stderr=self.stderr)
+
+ if capture_output:
+ if proc.returncode > 0:
+ self.log(f"Error {proc.returncode}", verbose=True, is_info=False)
+ return ""
+
+ return self.output
+
+ ret = subprocess.CompletedProcess(args=cmd,
+ returncode=proc.returncode,
+ stdout=self.stdout,
+ stderr=self.stderr)
+
+ return ret
+
+
+class SphinxVenv:
+ """
+ Installs Sphinx on one virtual env per Sphinx version with a minimal
+ set of dependencies, adjusting them to each specific version.
+ """
+
+ def __init__(self):
+ """Initialize instance variables"""
+
+ self.built_time = {}
+ self.first_run = True
+
+ async def _handle_version(self, args, fp,
+ cur_ver, cur_requirements, python_bin):
+ """Handle a single Sphinx version"""
+
+ cmd = AsyncCommands(fp)
+
+ ver = ".".join(map(str, cur_ver))
+
+ if not self.first_run and args.wait_input and args.build:
+ ret = input("Press Enter to continue or 'a' to abort: ").strip().lower()
+ if ret == "a":
+ print("Aborted.")
+ sys.exit()
+ else:
+ self.first_run = False
+
+ venv_dir = f"Sphinx_{ver}"
+ req_file = f"requirements_{ver}.txt"
+
+ cmd.log(f"\nSphinx {ver} with {python_bin}", verbose=True)
+
+ # Create venv
+ await cmd.run([python_bin, "-m", "venv", venv_dir],
+ verbose=args.verbose, check=True)
+ pip = os.path.join(venv_dir, "bin/pip")
+
+ # Create install list
+ reqs = []
+ for pkg, verstr in cur_requirements.items():
+ reqs.append(f"{pkg}=={verstr}")
+
+ reqs.append(f"Sphinx=={ver}")
+
+ await cmd.run([pip, "install"] + reqs, check=True, verbose=args.verbose)
+
+ # Freeze environment
+ result = await cmd.run([pip, "freeze"], verbose=False, check=True)
+
+ # Pip install succeeded. Write requirements file
+ if args.req_file:
+ with open(req_file, "w", encoding="utf-8") as fp:
+ fp.write(result.stdout)
+
+ if args.build:
+ start_time = time.time()
+
+ # Prepare a venv environment
+ env = os.environ.copy()
+ bin_dir = os.path.join(venv_dir, "bin")
+ env["PATH"] = bin_dir + ":" + env["PATH"]
+ env["VIRTUAL_ENV"] = venv_dir
+ if "PYTHONHOME" in env:
+ del env["PYTHONHOME"]
+
+ # Test doc build
+ await cmd.run(["make", "cleandocs"], env=env, check=True)
+ make = ["make"]
+
+ if args.output:
+ sphinx_build = os.path.realpath(f"{bin_dir}/sphinx-build")
+ make += [f"O={args.output}", f"SPHINXBUILD={sphinx_build}"]
+
+ if args.make_args:
+ make += args.make_args
+
+ make += args.targets
+
+ if args.verbose:
+ cmd.log(f". {bin_dir}/activate", verbose=True)
+ await cmd.run(make, env=env, check=True, verbose=True)
+ if args.verbose:
+ cmd.log("deactivate", verbose=True)
+
+ end_time = time.time()
+ elapsed_time = end_time - start_time
+ hours, minutes = divmod(elapsed_time, 3600)
+ minutes, seconds = divmod(minutes, 60)
+
+ hours = int(hours)
+ minutes = int(minutes)
+ seconds = int(seconds)
+
+ self.built_time[ver] = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
+
+ cmd.log(f"Finished doc build for Sphinx {ver}. Elapsed time: {self.built_time[ver]}", verbose=True)
+
+ async def run(self, args):
+ """
+ Navigate though multiple Sphinx versions, handling each of them
+ on a loop.
+ """
+
+ if args.log:
+ fp = open(args.log, "w", encoding="utf-8")
+ if not args.verbose:
+ args.verbose = False
+ else:
+ fp = None
+ if not args.verbose:
+ args.verbose = True
+
+ cur_requirements = {}
+ python_bin = min_python_bin
+
+ vers = set(SPHINX_REQUIREMENTS.keys()) | set(args.versions)
+
+ for cur_ver in sorted(vers):
+ if cur_ver in SPHINX_REQUIREMENTS:
+ new_reqs = SPHINX_REQUIREMENTS[cur_ver]
+ cur_requirements.update(new_reqs)
+
+ if cur_ver in PYTHON_VER_CHANGES: # pylint: disable=R1715
+ python_bin = PYTHON_VER_CHANGES[cur_ver]
+
+ if cur_ver not in args.versions:
+ continue
+
+ if args.min_version:
+ if cur_ver < args.min_version:
+ continue
+
+ if args.max_version:
+ if cur_ver > args.max_version:
+ break
+
+ await self._handle_version(args, fp, cur_ver, cur_requirements,
+ python_bin)
+
+ if args.build:
+ cmd = AsyncCommands(fp)
+ cmd.log("\nSummary:", verbose=True)
+ for ver, elapsed_time in sorted(self.built_time.items()):
+ cmd.log(f"\tSphinx {ver} elapsed time: {elapsed_time}",
+ verbose=True)
+
+ if fp:
+ fp.close()
+
+def parse_version(ver_str):
+ """Convert a version string into a tuple."""
+
+ return tuple(map(int, ver_str.split(".")))
+
+
+DEFAULT_VERS = " - "
+DEFAULT_VERS += "\n - ".join(map(lambda v: f"{v[0]}.{v[1]}.{v[2]}",
+ DEFAULT_VERSIONS_TO_TEST))
+
+SCRIPT = os.path.relpath(__file__)
+
+DESCRIPTION = f"""
+This tool allows creating Python virtual environments for different
+Sphinx versions that are supported by the Linux Kernel build system.
+
+Besides creating the virtual environment, it can also test building
+the documentation using "make htmldocs" (and/or other doc targets).
+
+If called without "--versions" argument, it covers the versions shipped
+on major distros, plus the lowest supported version:
+
+{DEFAULT_VERS}
+
+A typical usage is to run:
+
+ {SCRIPT} -m -l sphinx_builds.log
+
+This will create one virtual env for the default version set and run
+"make htmldocs" for each version, creating a log file with the
+excecuted commands on it.
+
+NOTE: The build time can be very long, specially on old versions. Also, there
+is a known bug with Sphinx version 6.0.x: each subprocess uses a lot of
+memory. That, together with "-jauto" may cause OOM killer to cause
+failures at the doc generation. To minimize the risk, you may use the
+"-a" command line parameter to constrain the built directories and/or
+reduce the number of threads from "-jauto" to, for instance, "-j4":
+
+ {SCRIPT} -m -V 6.0.1 -a "SPHINXDIRS=process" "SPHINXOPTS='-j4'"
+
+"""
+
+MAKE_TARGETS = [
+ "htmldocs",
+ "texinfodocs",
+ "infodocs",
+ "latexdocs",
+ "pdfdocs",
+ "epubdocs",
+ "xmldocs",
+]
+
+async def main():
+ """Main program"""
+
+ parser = argparse.ArgumentParser(description=DESCRIPTION,
+ formatter_class=argparse.RawDescriptionHelpFormatter)
+
+ ver_group = parser.add_argument_group("Version range options")
+
+ ver_group.add_argument('-V', '--versions', nargs="*",
+ default=DEFAULT_VERSIONS_TO_TEST,type=parse_version,
+ help='Sphinx versions to test')
+ ver_group.add_argument('--min-version', "--min", type=parse_version,
+ help='Sphinx minimal version')
+ ver_group.add_argument('--max-version', "--max", type=parse_version,
+ help='Sphinx maximum version')
+ ver_group.add_argument('-f', '--full', action='store_true',
+ help='Add all Sphinx (major,minor) supported versions to the version range')
+
+ build_group = parser.add_argument_group("Build options")
+
+ build_group.add_argument('-b', '--build', action='store_true',
+ help='Build documentation')
+ build_group.add_argument('-a', '--make-args', nargs="*",
+ help='extra arguments for make, like SPHINXDIRS=netlink/specs',
+ )
+ build_group.add_argument('-t', '--targets', nargs="+", choices=MAKE_TARGETS,
+ default=[MAKE_TARGETS[0]],
+ help="make build targets. Default: htmldocs.")
+ build_group.add_argument("-o", '--output',
+ help="output directory for the make O=OUTPUT")
+
+ other_group = parser.add_argument_group("Other options")
+
+ other_group.add_argument('-r', '--req-file', action='store_true',
+ help='write a requirements.txt file')
+ other_group.add_argument('-l', '--log',
+ help='Log command output on a file')
+ other_group.add_argument('-v', '--verbose', action='store_true',
+ help='Verbose all commands')
+ other_group.add_argument('-i', '--wait-input', action='store_true',
+ help='Wait for an enter before going to the next version')
+
+ args = parser.parse_args()
+
+ if not args.make_args:
+ args.make_args = []
+
+ sphinx_versions = sorted(list(SPHINX_REQUIREMENTS.keys()))
+
+ if args.full:
+ args.versions += list(SPHINX_REQUIREMENTS.keys())
+
+ venv = SphinxVenv()
+ await venv.run(args)
+
+
+# Call main method
+if __name__ == "__main__":
+ asyncio.run(main())
diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile
index ed565eb52275..342e056c8c66 100644
--- a/tools/gpio/Makefile
+++ b/tools/gpio/Makefile
@@ -77,7 +77,7 @@ $(OUTPUT)gpio-watch: $(GPIO_WATCH_IN)
clean:
rm -f $(ALL_PROGRAMS)
- rm -f $(OUTPUT)include/linux/gpio.h
+ rm -rf $(OUTPUT)include
find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete
install: $(ALL_PROGRAMS)
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index eab7b082f19d..03ca33869ce8 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -64,6 +64,7 @@ static const char * const iio_chan_type_name_spec[] = {
[IIO_COLORTEMP] = "colortemp",
[IIO_CHROMATICITY] = "chromaticity",
[IIO_ATTENTION] = "attention",
+ [IIO_ALTCURRENT] = "altcurrent",
};
static const char * const iio_ev_type_text[] = {
@@ -140,6 +141,10 @@ static const char * const iio_modifier_names[] = {
[IIO_MOD_PITCH] = "pitch",
[IIO_MOD_YAW] = "yaw",
[IIO_MOD_ROLL] = "roll",
+ [IIO_MOD_RMS] = "rms",
+ [IIO_MOD_ACTIVE] = "active",
+ [IIO_MOD_REACTIVE] = "reactive",
+ [IIO_MOD_APPARENT] = "apparent",
};
static bool event_is_known(struct iio_event_data *event)
@@ -187,6 +192,7 @@ static bool event_is_known(struct iio_event_data *event)
case IIO_COLORTEMP:
case IIO_CHROMATICITY:
case IIO_ATTENTION:
+ case IIO_ALTCURRENT:
break;
default:
return false;
@@ -238,6 +244,10 @@ static bool event_is_known(struct iio_event_data *event)
case IIO_MOD_PM4:
case IIO_MOD_PM10:
case IIO_MOD_O2:
+ case IIO_MOD_RMS:
+ case IIO_MOD_ACTIVE:
+ case IIO_MOD_REACTIVE:
+ case IIO_MOD_APPARENT:
break;
default:
return false;
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h
index e974ec932ec1..35f33780ca6c 100644
--- a/tools/include/asm-generic/bitops/__fls.h
+++ b/tools/include/asm-generic/bitops/__fls.h
@@ -10,7 +10,7 @@
*
* Undefined if no set bit exists, so code should check against 0 first.
*/
-static __always_inline unsigned int generic___fls(unsigned long word)
+static __always_inline __attribute_const__ unsigned int generic___fls(unsigned long word)
{
unsigned int num = BITS_PER_LONG - 1;
diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h
index 26f3ce1dd6e4..8eed3437edb9 100644
--- a/tools/include/asm-generic/bitops/fls.h
+++ b/tools/include/asm-generic/bitops/fls.h
@@ -10,7 +10,7 @@
* Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
*/
-static __always_inline int generic_fls(unsigned int x)
+static __always_inline __attribute_const__ int generic_fls(unsigned int x)
{
int r = 32;
diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h
index 866f2b2304ff..b5f58dd261a3 100644
--- a/tools/include/asm-generic/bitops/fls64.h
+++ b/tools/include/asm-generic/bitops/fls64.h
@@ -16,7 +16,7 @@
* at position 64.
*/
#if BITS_PER_LONG == 32
-static __always_inline int fls64(__u64 x)
+static __always_inline __attribute_const__ int fls64(__u64 x)
{
__u32 h = x >> 32;
if (h)
@@ -24,7 +24,7 @@ static __always_inline int fls64(__u64 x)
return fls(x);
}
#elif BITS_PER_LONG == 64
-static __always_inline int fls64(__u64 x)
+static __always_inline __attribute_const__ int fls64(__u64 x)
{
if (x == 0)
return 0;
diff --git a/tools/include/asm-generic/io.h b/tools/include/asm-generic/io.h
new file mode 100644
index 000000000000..e5a0b07ad452
--- /dev/null
+++ b/tools/include/asm-generic/io.h
@@ -0,0 +1,482 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_GENERIC_IO_H
+#define _TOOLS_ASM_GENERIC_IO_H
+
+#include <asm/barrier.h>
+#include <asm/byteorder.h>
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#ifndef mmiowb_set_pending
+#define mmiowb_set_pending() do { } while (0)
+#endif
+
+#ifndef __io_br
+#define __io_br() barrier()
+#endif
+
+/* prevent prefetching of coherent DMA data ahead of a dma-complete */
+#ifndef __io_ar
+#ifdef rmb
+#define __io_ar(v) rmb()
+#else
+#define __io_ar(v) barrier()
+#endif
+#endif
+
+/* flush writes to coherent DMA data before possibly triggering a DMA read */
+#ifndef __io_bw
+#ifdef wmb
+#define __io_bw() wmb()
+#else
+#define __io_bw() barrier()
+#endif
+#endif
+
+/* serialize device access against a spin_unlock, usually handled there. */
+#ifndef __io_aw
+#define __io_aw() mmiowb_set_pending()
+#endif
+
+#ifndef __io_pbw
+#define __io_pbw() __io_bw()
+#endif
+
+#ifndef __io_paw
+#define __io_paw() __io_aw()
+#endif
+
+#ifndef __io_pbr
+#define __io_pbr() __io_br()
+#endif
+
+#ifndef __io_par
+#define __io_par(v) __io_ar(v)
+#endif
+
+#ifndef _THIS_IP_
+#define _THIS_IP_ 0
+#endif
+
+static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+static inline void log_read_mmio(u8 width, const volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr,
+ unsigned long caller_addr, unsigned long caller_addr0) {}
+
+/*
+ * __raw_{read,write}{b,w,l,q}() access memory in native endianness.
+ *
+ * On some architectures memory mapped IO needs to be accessed differently.
+ * On the simple architectures, we just read/write the memory location
+ * directly.
+ */
+
+#ifndef __raw_readb
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+ return *(const volatile u8 __force *)addr;
+}
+#endif
+
+#ifndef __raw_readw
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+ return *(const volatile u16 __force *)addr;
+}
+#endif
+
+#ifndef __raw_readl
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+ return *(const volatile u32 __force *)addr;
+}
+#endif
+
+#ifndef __raw_readq
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+ return *(const volatile u64 __force *)addr;
+}
+#endif
+
+#ifndef __raw_writeb
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 value, volatile void __iomem *addr)
+{
+ *(volatile u8 __force *)addr = value;
+}
+#endif
+
+#ifndef __raw_writew
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 value, volatile void __iomem *addr)
+{
+ *(volatile u16 __force *)addr = value;
+}
+#endif
+
+#ifndef __raw_writel
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 value, volatile void __iomem *addr)
+{
+ *(volatile u32 __force *)addr = value;
+}
+#endif
+
+#ifndef __raw_writeq
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 value, volatile void __iomem *addr)
+{
+ *(volatile u64 __force *)addr = value;
+}
+#endif
+
+/*
+ * {read,write}{b,w,l,q}() access little endian memory and return result in
+ * native endianness.
+ */
+
+#ifndef readb
+#define readb readb
+static inline u8 readb(const volatile void __iomem *addr)
+{
+ u8 val;
+
+ log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __raw_readb(addr);
+ __io_ar(val);
+ log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readw
+#define readw readw
+static inline u16 readw(const volatile void __iomem *addr)
+{
+ u16 val;
+
+ log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __le16_to_cpu((__le16 __force)__raw_readw(addr));
+ __io_ar(val);
+ log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readl
+#define readl readl
+static inline u32 readl(const volatile void __iomem *addr)
+{
+ u32 val;
+
+ log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
+ __io_ar(val);
+ log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readq
+#define readq readq
+static inline u64 readq(const volatile void __iomem *addr)
+{
+ u64 val;
+
+ log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
+ __io_br();
+ val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
+ __io_ar(val);
+ log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef writeb
+#define writeb writeb
+static inline void writeb(u8 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writeb(value, addr);
+ __io_aw();
+ log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writew
+#define writew writew
+static inline void writew(u16 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writew((u16 __force)cpu_to_le16(value), addr);
+ __io_aw();
+ log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writel
+#define writel writel
+static inline void writel(u32 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writel((u32 __force)__cpu_to_le32(value), addr);
+ __io_aw();
+ log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writeq
+#define writeq writeq
+static inline void writeq(u64 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+ __io_bw();
+ __raw_writeq((u64 __force)__cpu_to_le64(value), addr);
+ __io_aw();
+ log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+/*
+ * {read,write}{b,w,l,q}_relaxed() are like the regular version, but
+ * are not guaranteed to provide ordering against spinlocks or memory
+ * accesses.
+ */
+#ifndef readb_relaxed
+#define readb_relaxed readb_relaxed
+static inline u8 readb_relaxed(const volatile void __iomem *addr)
+{
+ u8 val;
+
+ log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
+ val = __raw_readb(addr);
+ log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readw_relaxed
+#define readw_relaxed readw_relaxed
+static inline u16 readw_relaxed(const volatile void __iomem *addr)
+{
+ u16 val;
+
+ log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
+ val = __le16_to_cpu((__le16 __force)__raw_readw(addr));
+ log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef readl_relaxed
+#define readl_relaxed readl_relaxed
+static inline u32 readl_relaxed(const volatile void __iomem *addr)
+{
+ u32 val;
+
+ log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
+ val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
+ log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#if defined(readq) && !defined(readq_relaxed)
+#define readq_relaxed readq_relaxed
+static inline u64 readq_relaxed(const volatile void __iomem *addr)
+{
+ u64 val;
+
+ log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
+ val = __le64_to_cpu((__le64 __force)__raw_readq(addr));
+ log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
+ return val;
+}
+#endif
+
+#ifndef writeb_relaxed
+#define writeb_relaxed writeb_relaxed
+static inline void writeb_relaxed(u8 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+ __raw_writeb(value, addr);
+ log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writew_relaxed
+#define writew_relaxed writew_relaxed
+static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+ __raw_writew((u16 __force)cpu_to_le16(value), addr);
+ log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#ifndef writel_relaxed
+#define writel_relaxed writel_relaxed
+static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+ __raw_writel((u32 __force)__cpu_to_le32(value), addr);
+ log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+#if defined(writeq) && !defined(writeq_relaxed)
+#define writeq_relaxed writeq_relaxed
+static inline void writeq_relaxed(u64 value, volatile void __iomem *addr)
+{
+ log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+ __raw_writeq((u64 __force)__cpu_to_le64(value), addr);
+ log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
+}
+#endif
+
+/*
+ * {read,write}s{b,w,l,q}() repeatedly access the same memory address in
+ * native endianness in 8-, 16-, 32- or 64-bit chunks (@count times).
+ */
+#ifndef readsb
+#define readsb readsb
+static inline void readsb(const volatile void __iomem *addr, void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ u8 *buf = buffer;
+
+ do {
+ u8 x = __raw_readb(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef readsw
+#define readsw readsw
+static inline void readsw(const volatile void __iomem *addr, void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ u16 *buf = buffer;
+
+ do {
+ u16 x = __raw_readw(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef readsl
+#define readsl readsl
+static inline void readsl(const volatile void __iomem *addr, void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ u32 *buf = buffer;
+
+ do {
+ u32 x = __raw_readl(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef readsq
+#define readsq readsq
+static inline void readsq(const volatile void __iomem *addr, void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ u64 *buf = buffer;
+
+ do {
+ u64 x = __raw_readq(addr);
+ *buf++ = x;
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef writesb
+#define writesb writesb
+static inline void writesb(volatile void __iomem *addr, const void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ const u8 *buf = buffer;
+
+ do {
+ __raw_writeb(*buf++, addr);
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef writesw
+#define writesw writesw
+static inline void writesw(volatile void __iomem *addr, const void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ const u16 *buf = buffer;
+
+ do {
+ __raw_writew(*buf++, addr);
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef writesl
+#define writesl writesl
+static inline void writesl(volatile void __iomem *addr, const void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ const u32 *buf = buffer;
+
+ do {
+ __raw_writel(*buf++, addr);
+ } while (--count);
+ }
+}
+#endif
+
+#ifndef writesq
+#define writesq writesq
+static inline void writesq(volatile void __iomem *addr, const void *buffer,
+ unsigned int count)
+{
+ if (count) {
+ const u64 *buf = buffer;
+
+ do {
+ __raw_writeq(*buf++, addr);
+ } while (--count);
+ }
+}
+#endif
+
+#endif /* _TOOLS_ASM_GENERIC_IO_H */
diff --git a/tools/include/asm/io.h b/tools/include/asm/io.h
new file mode 100644
index 000000000000..eed5066f25c4
--- /dev/null
+++ b/tools/include/asm/io.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_IO_H
+#define _TOOLS_ASM_IO_H
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/asm/io.h"
+#else
+#include <asm-generic/io.h>
+#endif
+
+#endif /* _TOOLS_ASM_IO_H */
diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h
new file mode 100644
index 000000000000..2e8e65d975c7
--- /dev/null
+++ b/tools/include/linux/args.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _LINUX_ARGS_H
+#define _LINUX_ARGS_H
+
+/*
+ * How do these macros work?
+ *
+ * In __COUNT_ARGS() _0 to _12 are just placeholders from the start
+ * in order to make sure _n is positioned over the correct number
+ * from 12 to 0 (depending on X, which is a variadic argument list).
+ * They serve no purpose other than occupying a position. Since each
+ * macro parameter must have a distinct identifier, those identifiers
+ * are as good as any.
+ *
+ * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns
+ * that as _n.
+ */
+
+/* This counts to 15. Any more, it will return 16th argument. */
+#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n
+#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+
+/* Concatenate two parameters, but allow them to be expanded beforehand. */
+#define __CONCAT(a, b) a ## b
+#define CONCATENATE(a, b) __CONCAT(a, b)
+
+#endif /* _LINUX_ARGS_H */
diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h
index 01907b33537e..50c66ba9ada5 100644
--- a/tools/include/linux/atomic.h
+++ b/tools/include/linux/atomic.h
@@ -12,4 +12,26 @@ void atomic_long_set(atomic_long_t *v, long i);
#define atomic_cmpxchg_release atomic_cmpxchg
#endif /* atomic_cmpxchg_relaxed */
+static inline bool atomic_try_cmpxchg(atomic_t *ptr, int *oldp, int new)
+{
+ int ret, old = *oldp;
+
+ ret = atomic_cmpxchg(ptr, old, new);
+ if (ret != old)
+ *oldp = ret;
+ return ret == old;
+}
+
+static inline bool atomic_inc_unless_negative(atomic_t *v)
+{
+ int c = atomic_read(v);
+
+ do {
+ if (unlikely(c < 0))
+ return false;
+ } while (!atomic_try_cmpxchg(v, &c, c + 1));
+
+ return true;
+}
+
#endif /* __TOOLS_LINUX_ATOMIC_H */
diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index d4d300040d01..0d992245c600 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -3,6 +3,7 @@
#define _TOOLS_LINUX_BITMAP_H
#include <string.h>
+#include <asm-generic/bitsperlong.h>
#include <linux/align.h>
#include <linux/bitops.h>
#include <linux/find.h>
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 7ad056219115..a40cc861b3a7 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -2,10 +2,8 @@
#ifndef __LINUX_BITS_H
#define __LINUX_BITS_H
-#include <linux/const.h>
#include <vdso/bits.h>
#include <uapi/linux/bits.h>
-#include <asm/bitsperlong.h>
#define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
@@ -50,10 +48,14 @@
(type_max(t) << (l) & \
type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h)))))
+#define GENMASK(h, l) GENMASK_TYPE(unsigned long, h, l)
+#define GENMASK_ULL(h, l) GENMASK_TYPE(unsigned long long, h, l)
+
#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l)
#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l)
#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l)
#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l)
+#define GENMASK_U128(h, l) GENMASK_TYPE(u128, h, l)
/*
* Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The
@@ -79,28 +81,9 @@
* BUILD_BUG_ON_ZERO is not available in h files included from asm files,
* disable the input check if that is the case.
*/
-#define GENMASK_INPUT_CHECK(h, l) 0
+#define GENMASK(h, l) __GENMASK(h, l)
+#define GENMASK_ULL(h, l) __GENMASK_ULL(h, l)
#endif /* !defined(__ASSEMBLY__) */
-#define GENMASK(h, l) \
- (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
-#define GENMASK_ULL(h, l) \
- (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
-
-#if !defined(__ASSEMBLY__)
-/*
- * Missing asm support
- *
- * __GENMASK_U128() depends on _BIT128() which would not work
- * in the asm code, as it shifts an 'unsigned __int128' data
- * type instead of direct representation of 128 bit constants
- * such as long and unsigned long. The fundamental problem is
- * that a 128 bit constant will get silently truncated by the
- * gcc compiler.
- */
-#define GENMASK_U128(h, l) \
- (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l))
-#endif
-
#endif /* __LINUX_BITS_H */
diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h
index 6b8713675765..a86af9bc8bdc 100644
--- a/tools/include/linux/cfi_types.h
+++ b/tools/include/linux/cfi_types.h
@@ -8,7 +8,7 @@
#ifdef __ASSEMBLY__
#include <linux/linkage.h>
-#ifdef CONFIG_CFI_CLANG
+#ifdef CONFIG_CFI
/*
* Use the __kcfi_typeid_<function> type identifier symbol to
* annotate indirectly called assembly functions. The compiler emits
@@ -29,17 +29,40 @@
#define SYM_TYPED_START(name, linkage, align...) \
SYM_TYPED_ENTRY(name, linkage, align)
-#else /* CONFIG_CFI_CLANG */
+#else /* CONFIG_CFI */
#define SYM_TYPED_START(name, linkage, align...) \
SYM_START(name, linkage, align)
-#endif /* CONFIG_CFI_CLANG */
+#endif /* CONFIG_CFI */
#ifndef SYM_TYPED_FUNC_START
#define SYM_TYPED_FUNC_START(name) \
SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)
#endif
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_CFI
+#define DEFINE_CFI_TYPE(name, func) \
+ /* \
+ * Force a reference to the function so the compiler generates \
+ * __kcfi_typeid_<func>. \
+ */ \
+ __ADDRESSABLE(func); \
+ /* u32 name __ro_after_init = __kcfi_typeid_<func> */ \
+ extern u32 name; \
+ asm ( \
+ " .pushsection .data..ro_after_init,\"aw\",\%progbits \n" \
+ " .type " #name ",\%object \n" \
+ " .globl " #name " \n" \
+ " .p2align 2, 0x0 \n" \
+ #name ": \n" \
+ " .4byte __kcfi_typeid_" #func " \n" \
+ " .size " #name ", 4 \n" \
+ " .popsection \n" \
+ );
+#endif
+
#endif /* __ASSEMBLY__ */
#endif /* _LINUX_CFI_TYPES_H */
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 33411ca0cc90..f40bd2b04c29 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -138,6 +138,10 @@
# define __force
#endif
+#ifndef __iomem
+# define __iomem
+#endif
+
#ifndef __weak
# define __weak __attribute__((weak))
#endif
diff --git a/tools/include/linux/gfp_types.h b/tools/include/linux/gfp_types.h
index 5f9f1ed190a0..65db9349f905 100644
--- a/tools/include/linux/gfp_types.h
+++ b/tools/include/linux/gfp_types.h
@@ -1 +1,392 @@
-#include "../../../include/linux/gfp_types.h"
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_GFP_TYPES_H
+#define __LINUX_GFP_TYPES_H
+
+#include <linux/bits.h>
+
+/* The typedef is in types.h but we want the documentation here */
+#if 0
+/**
+ * typedef gfp_t - Memory allocation flags.
+ *
+ * GFP flags are commonly used throughout Linux to indicate how memory
+ * should be allocated. The GFP acronym stands for get_free_pages(),
+ * the underlying memory allocation function. Not every GFP flag is
+ * supported by every function which may allocate memory. Most users
+ * will want to use a plain ``GFP_KERNEL``.
+ */
+typedef unsigned int __bitwise gfp_t;
+#endif
+
+/*
+ * In case of changes, please don't forget to update
+ * include/trace/events/mmflags.h and tools/perf/builtin-kmem.c
+ */
+
+enum {
+ ___GFP_DMA_BIT,
+ ___GFP_HIGHMEM_BIT,
+ ___GFP_DMA32_BIT,
+ ___GFP_MOVABLE_BIT,
+ ___GFP_RECLAIMABLE_BIT,
+ ___GFP_HIGH_BIT,
+ ___GFP_IO_BIT,
+ ___GFP_FS_BIT,
+ ___GFP_ZERO_BIT,
+ ___GFP_UNUSED_BIT, /* 0x200u unused */
+ ___GFP_DIRECT_RECLAIM_BIT,
+ ___GFP_KSWAPD_RECLAIM_BIT,
+ ___GFP_WRITE_BIT,
+ ___GFP_NOWARN_BIT,
+ ___GFP_RETRY_MAYFAIL_BIT,
+ ___GFP_NOFAIL_BIT,
+ ___GFP_NORETRY_BIT,
+ ___GFP_MEMALLOC_BIT,
+ ___GFP_COMP_BIT,
+ ___GFP_NOMEMALLOC_BIT,
+ ___GFP_HARDWALL_BIT,
+ ___GFP_THISNODE_BIT,
+ ___GFP_ACCOUNT_BIT,
+ ___GFP_ZEROTAGS_BIT,
+#ifdef CONFIG_KASAN_HW_TAGS
+ ___GFP_SKIP_ZERO_BIT,
+ ___GFP_SKIP_KASAN_BIT,
+#endif
+#ifdef CONFIG_LOCKDEP
+ ___GFP_NOLOCKDEP_BIT,
+#endif
+#ifdef CONFIG_SLAB_OBJ_EXT
+ ___GFP_NO_OBJ_EXT_BIT,
+#endif
+ ___GFP_LAST_BIT
+};
+
+/* Plain integer GFP bitmasks. Do not use this directly. */
+#define ___GFP_DMA BIT(___GFP_DMA_BIT)
+#define ___GFP_HIGHMEM BIT(___GFP_HIGHMEM_BIT)
+#define ___GFP_DMA32 BIT(___GFP_DMA32_BIT)
+#define ___GFP_MOVABLE BIT(___GFP_MOVABLE_BIT)
+#define ___GFP_RECLAIMABLE BIT(___GFP_RECLAIMABLE_BIT)
+#define ___GFP_HIGH BIT(___GFP_HIGH_BIT)
+#define ___GFP_IO BIT(___GFP_IO_BIT)
+#define ___GFP_FS BIT(___GFP_FS_BIT)
+#define ___GFP_ZERO BIT(___GFP_ZERO_BIT)
+/* 0x200u unused */
+#define ___GFP_DIRECT_RECLAIM BIT(___GFP_DIRECT_RECLAIM_BIT)
+#define ___GFP_KSWAPD_RECLAIM BIT(___GFP_KSWAPD_RECLAIM_BIT)
+#define ___GFP_WRITE BIT(___GFP_WRITE_BIT)
+#define ___GFP_NOWARN BIT(___GFP_NOWARN_BIT)
+#define ___GFP_RETRY_MAYFAIL BIT(___GFP_RETRY_MAYFAIL_BIT)
+#define ___GFP_NOFAIL BIT(___GFP_NOFAIL_BIT)
+#define ___GFP_NORETRY BIT(___GFP_NORETRY_BIT)
+#define ___GFP_MEMALLOC BIT(___GFP_MEMALLOC_BIT)
+#define ___GFP_COMP BIT(___GFP_COMP_BIT)
+#define ___GFP_NOMEMALLOC BIT(___GFP_NOMEMALLOC_BIT)
+#define ___GFP_HARDWALL BIT(___GFP_HARDWALL_BIT)
+#define ___GFP_THISNODE BIT(___GFP_THISNODE_BIT)
+#define ___GFP_ACCOUNT BIT(___GFP_ACCOUNT_BIT)
+#define ___GFP_ZEROTAGS BIT(___GFP_ZEROTAGS_BIT)
+#ifdef CONFIG_KASAN_HW_TAGS
+#define ___GFP_SKIP_ZERO BIT(___GFP_SKIP_ZERO_BIT)
+#define ___GFP_SKIP_KASAN BIT(___GFP_SKIP_KASAN_BIT)
+#else
+#define ___GFP_SKIP_ZERO 0
+#define ___GFP_SKIP_KASAN 0
+#endif
+#ifdef CONFIG_LOCKDEP
+#define ___GFP_NOLOCKDEP BIT(___GFP_NOLOCKDEP_BIT)
+#else
+#define ___GFP_NOLOCKDEP 0
+#endif
+#ifdef CONFIG_SLAB_OBJ_EXT
+#define ___GFP_NO_OBJ_EXT BIT(___GFP_NO_OBJ_EXT_BIT)
+#else
+#define ___GFP_NO_OBJ_EXT 0
+#endif
+
+/*
+ * Physical address zone modifiers (see linux/mmzone.h - low four bits)
+ *
+ * Do not put any conditional on these. If necessary modify the definitions
+ * without the underscores and use them consistently. The definitions here may
+ * be used in bit comparisons.
+ */
+#define __GFP_DMA ((__force gfp_t)___GFP_DMA)
+#define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM)
+#define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32)
+#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */
+#define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
+
+/**
+ * DOC: Page mobility and placement hints
+ *
+ * Page mobility and placement hints
+ * ---------------------------------
+ *
+ * These flags provide hints about how mobile the page is. Pages with similar
+ * mobility are placed within the same pageblocks to minimise problems due
+ * to external fragmentation.
+ *
+ * %__GFP_MOVABLE (also a zone modifier) indicates that the page can be
+ * moved by page migration during memory compaction or can be reclaimed.
+ *
+ * %__GFP_RECLAIMABLE is used for slab allocations that specify
+ * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers.
+ *
+ * %__GFP_WRITE indicates the caller intends to dirty the page. Where possible,
+ * these pages will be spread between local zones to avoid all the dirty
+ * pages being in one zone (fair zone allocation policy).
+ *
+ * %__GFP_HARDWALL enforces the cpuset memory allocation policy.
+ *
+ * %__GFP_THISNODE forces the allocation to be satisfied from the requested
+ * node with no fallbacks or placement policy enforcements.
+ *
+ * %__GFP_ACCOUNT causes the allocation to be accounted to kmemcg.
+ *
+ * %__GFP_NO_OBJ_EXT causes slab allocation to have no object extension.
+ */
+#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE)
+#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE)
+#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL)
+#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)
+#define __GFP_ACCOUNT ((__force gfp_t)___GFP_ACCOUNT)
+#define __GFP_NO_OBJ_EXT ((__force gfp_t)___GFP_NO_OBJ_EXT)
+
+/**
+ * DOC: Watermark modifiers
+ *
+ * Watermark modifiers -- controls access to emergency reserves
+ * ------------------------------------------------------------
+ *
+ * %__GFP_HIGH indicates that the caller is high-priority and that granting
+ * the request is necessary before the system can make forward progress.
+ * For example creating an IO context to clean pages and requests
+ * from atomic context.
+ *
+ * %__GFP_MEMALLOC allows access to all memory. This should only be used when
+ * the caller guarantees the allocation will allow more memory to be freed
+ * very shortly e.g. process exiting or swapping. Users either should
+ * be the MM or co-ordinating closely with the VM (e.g. swap over NFS).
+ * Users of this flag have to be extremely careful to not deplete the reserve
+ * completely and implement a throttling mechanism which controls the
+ * consumption of the reserve based on the amount of freed memory.
+ * Usage of a pre-allocated pool (e.g. mempool) should be always considered
+ * before using this flag.
+ *
+ * %__GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves.
+ * This takes precedence over the %__GFP_MEMALLOC flag if both are set.
+ */
+#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH)
+#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC)
+#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC)
+
+/**
+ * DOC: Reclaim modifiers
+ *
+ * Reclaim modifiers
+ * -----------------
+ * Please note that all the following flags are only applicable to sleepable
+ * allocations (e.g. %GFP_NOWAIT and %GFP_ATOMIC will ignore them).
+ *
+ * %__GFP_IO can start physical IO.
+ *
+ * %__GFP_FS can call down to the low-level FS. Clearing the flag avoids the
+ * allocator recursing into the filesystem which might already be holding
+ * locks.
+ *
+ * %__GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim.
+ * This flag can be cleared to avoid unnecessary delays when a fallback
+ * option is available.
+ *
+ * %__GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when
+ * the low watermark is reached and have it reclaim pages until the high
+ * watermark is reached. A caller may wish to clear this flag when fallback
+ * options are available and the reclaim is likely to disrupt the system. The
+ * canonical example is THP allocation where a fallback is cheap but
+ * reclaim/compaction may cause indirect stalls.
+ *
+ * %__GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim.
+ *
+ * The default allocator behavior depends on the request size. We have a concept
+ * of so-called costly allocations (with order > %PAGE_ALLOC_COSTLY_ORDER).
+ * !costly allocations are too essential to fail so they are implicitly
+ * non-failing by default (with some exceptions like OOM victims might fail so
+ * the caller still has to check for failures) while costly requests try to be
+ * not disruptive and back off even without invoking the OOM killer.
+ * The following three modifiers might be used to override some of these
+ * implicit rules. Please note that all of them must be used along with
+ * %__GFP_DIRECT_RECLAIM flag.
+ *
+ * %__GFP_NORETRY: The VM implementation will try only very lightweight
+ * memory direct reclaim to get some memory under memory pressure (thus
+ * it can sleep). It will avoid disruptive actions like OOM killer. The
+ * caller must handle the failure which is quite likely to happen under
+ * heavy memory pressure. The flag is suitable when failure can easily be
+ * handled at small cost, such as reduced throughput.
+ *
+ * %__GFP_RETRY_MAYFAIL: The VM implementation will retry memory reclaim
+ * procedures that have previously failed if there is some indication
+ * that progress has been made elsewhere. It can wait for other
+ * tasks to attempt high-level approaches to freeing memory such as
+ * compaction (which removes fragmentation) and page-out.
+ * There is still a definite limit to the number of retries, but it is
+ * a larger limit than with %__GFP_NORETRY.
+ * Allocations with this flag may fail, but only when there is
+ * genuinely little unused memory. While these allocations do not
+ * directly trigger the OOM killer, their failure indicates that
+ * the system is likely to need to use the OOM killer soon. The
+ * caller must handle failure, but can reasonably do so by failing
+ * a higher-level request, or completing it only in a much less
+ * efficient manner.
+ * If the allocation does fail, and the caller is in a position to
+ * free some non-essential memory, doing so could benefit the system
+ * as a whole.
+ *
+ * %__GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
+ * cannot handle allocation failures. The allocation could block
+ * indefinitely but will never return with failure. Testing for
+ * failure is pointless.
+ * It _must_ be blockable and used together with __GFP_DIRECT_RECLAIM.
+ * It should _never_ be used in non-sleepable contexts.
+ * New users should be evaluated carefully (and the flag should be
+ * used only when there is no reasonable failure policy) but it is
+ * definitely preferable to use the flag rather than opencode endless
+ * loop around allocator.
+ * Allocating pages from the buddy with __GFP_NOFAIL and order > 1 is
+ * not supported. Please consider using kvmalloc() instead.
+ */
+#define __GFP_IO ((__force gfp_t)___GFP_IO)
+#define __GFP_FS ((__force gfp_t)___GFP_FS)
+#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */
+#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */
+#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM))
+#define __GFP_RETRY_MAYFAIL ((__force gfp_t)___GFP_RETRY_MAYFAIL)
+#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL)
+#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY)
+
+/**
+ * DOC: Action modifiers
+ *
+ * Action modifiers
+ * ----------------
+ *
+ * %__GFP_NOWARN suppresses allocation failure reports.
+ *
+ * %__GFP_COMP address compound page metadata.
+ *
+ * %__GFP_ZERO returns a zeroed page on success.
+ *
+ * %__GFP_ZEROTAGS zeroes memory tags at allocation time if the memory itself
+ * is being zeroed (either via __GFP_ZERO or via init_on_alloc, provided that
+ * __GFP_SKIP_ZERO is not set). This flag is intended for optimization: setting
+ * memory tags at the same time as zeroing memory has minimal additional
+ * performance impact.
+ *
+ * %__GFP_SKIP_KASAN makes KASAN skip unpoisoning on page allocation.
+ * Used for userspace and vmalloc pages; the latter are unpoisoned by
+ * kasan_unpoison_vmalloc instead. For userspace pages, results in
+ * poisoning being skipped as well, see should_skip_kasan_poison for
+ * details. Only effective in HW_TAGS mode.
+ */
+#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN)
+#define __GFP_COMP ((__force gfp_t)___GFP_COMP)
+#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO)
+#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS)
+#define __GFP_SKIP_ZERO ((__force gfp_t)___GFP_SKIP_ZERO)
+#define __GFP_SKIP_KASAN ((__force gfp_t)___GFP_SKIP_KASAN)
+
+/* Disable lockdep for GFP context tracking */
+#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP)
+
+/* Room for N __GFP_FOO bits */
+#define __GFP_BITS_SHIFT ___GFP_LAST_BIT
+#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+
+/**
+ * DOC: Useful GFP flag combinations
+ *
+ * Useful GFP flag combinations
+ * ----------------------------
+ *
+ * Useful GFP flag combinations that are commonly used. It is recommended
+ * that subsystems start with one of these combinations and then set/clear
+ * %__GFP_FOO flags as necessary.
+ *
+ * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower
+ * watermark is applied to allow access to "atomic reserves".
+ * The current implementation doesn't support NMI and few other strict
+ * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT.
+ *
+ * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires
+ * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim.
+ *
+ * %GFP_KERNEL_ACCOUNT is the same as GFP_KERNEL, except the allocation is
+ * accounted to kmemcg.
+ *
+ * %GFP_NOWAIT is for kernel allocations that should not stall for direct
+ * reclaim, start physical IO or use any filesystem callback. It is very
+ * likely to fail to allocate memory, even for very small allocations.
+ *
+ * %GFP_NOIO will use direct reclaim to discard clean pages or slab pages
+ * that do not require the starting of any physical IO.
+ * Please try to avoid using this flag directly and instead use
+ * memalloc_noio_{save,restore} to mark the whole scope which cannot
+ * perform any IO with a short explanation why. All allocation requests
+ * will inherit GFP_NOIO implicitly.
+ *
+ * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces.
+ * Please try to avoid using this flag directly and instead use
+ * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't
+ * recurse into the FS layer with a short explanation why. All allocation
+ * requests will inherit GFP_NOFS implicitly.
+ *
+ * %GFP_USER is for userspace allocations that also need to be directly
+ * accessibly by the kernel or hardware. It is typically used by hardware
+ * for buffers that are mapped to userspace (e.g. graphics) that hardware
+ * still must DMA to. cpuset limits are enforced for these allocations.
+ *
+ * %GFP_DMA exists for historical reasons and should be avoided where possible.
+ * The flags indicates that the caller requires that the lowest zone be
+ * used (%ZONE_DMA or 16M on x86-64). Ideally, this would be removed but
+ * it would require careful auditing as some users really require it and
+ * others use the flag to avoid lowmem reserves in %ZONE_DMA and treat the
+ * lowest zone as a type of emergency reserve.
+ *
+ * %GFP_DMA32 is similar to %GFP_DMA except that the caller requires a 32-bit
+ * address. Note that kmalloc(..., GFP_DMA32) does not return DMA32 memory
+ * because the DMA32 kmalloc cache array is not implemented.
+ * (Reason: there is no such user in kernel).
+ *
+ * %GFP_HIGHUSER is for userspace allocations that may be mapped to userspace,
+ * do not need to be directly accessible by the kernel but that cannot
+ * move once in use. An example may be a hardware allocation that maps
+ * data directly into userspace but has no addressing limitations.
+ *
+ * %GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not
+ * need direct access to but can use kmap() when access is required. They
+ * are expected to be movable via page reclaim or page migration. Typically,
+ * pages on the LRU would also be allocated with %GFP_HIGHUSER_MOVABLE.
+ *
+ * %GFP_TRANSHUGE and %GFP_TRANSHUGE_LIGHT are used for THP allocations. They
+ * are compound allocations that will generally fail quickly if memory is not
+ * available and will not wake kswapd/kcompactd on failure. The _LIGHT
+ * version does not attempt reclaim/compaction at all and is by default used
+ * in page fault path, while the non-light is used by khugepaged.
+ */
+#define GFP_ATOMIC (__GFP_HIGH|__GFP_KSWAPD_RECLAIM)
+#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS)
+#define GFP_KERNEL_ACCOUNT (GFP_KERNEL | __GFP_ACCOUNT)
+#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM | __GFP_NOWARN)
+#define GFP_NOIO (__GFP_RECLAIM)
+#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO)
+#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
+#define GFP_DMA __GFP_DMA
+#define GFP_DMA32 __GFP_DMA32
+#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
+#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | __GFP_SKIP_KASAN)
+#define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
+ __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM)
+#define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM)
+
+#endif /* __LINUX_GFP_TYPES_H */
diff --git a/tools/include/linux/interval_tree_generic.h b/tools/include/linux/interval_tree_generic.h
index aaa8a0767aa3..c5a2fed49eb0 100644
--- a/tools/include/linux/interval_tree_generic.h
+++ b/tools/include/linux/interval_tree_generic.h
@@ -77,7 +77,7 @@ ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node, \
* Cond2: start <= ITLAST(node) \
*/ \
\
-static ITSTRUCT * \
+ITSTATIC ITSTRUCT * \
ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
{ \
while (true) { \
@@ -104,12 +104,8 @@ ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last) \
if (ITSTART(node) <= last) { /* Cond1 */ \
if (start <= ITLAST(node)) /* Cond2 */ \
return node; /* node is leftmost match */ \
- if (node->ITRB.rb_right) { \
- node = rb_entry(node->ITRB.rb_right, \
- ITSTRUCT, ITRB); \
- if (start <= node->ITSUBTREE) \
- continue; \
- } \
+ node = rb_entry(node->ITRB.rb_right, ITSTRUCT, ITRB); \
+ continue; \
} \
return NULL; /* No match */ \
} \
diff --git a/tools/include/linux/io.h b/tools/include/linux/io.h
index e129871fe661..4b94b84160b8 100644
--- a/tools/include/linux/io.h
+++ b/tools/include/linux/io.h
@@ -2,4 +2,6 @@
#ifndef _TOOLS_IO_H
#define _TOOLS_IO_H
-#endif
+#include <asm/io.h>
+
+#endif /* _TOOLS_IO_H */
diff --git a/tools/include/linux/livepatch_external.h b/tools/include/linux/livepatch_external.h
new file mode 100644
index 000000000000..138af19b0f5c
--- /dev/null
+++ b/tools/include/linux/livepatch_external.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * External livepatch interfaces for patch creation tooling
+ */
+
+#ifndef _LINUX_LIVEPATCH_EXTERNAL_H_
+#define _LINUX_LIVEPATCH_EXTERNAL_H_
+
+#include <linux/types.h>
+
+#define KLP_RELOC_SEC_PREFIX ".klp.rela."
+#define KLP_SYM_PREFIX ".klp.sym."
+
+#define __KLP_PRE_PATCH_PREFIX __klp_pre_patch_callback_
+#define __KLP_POST_PATCH_PREFIX __klp_post_patch_callback_
+#define __KLP_PRE_UNPATCH_PREFIX __klp_pre_unpatch_callback_
+#define __KLP_POST_UNPATCH_PREFIX __klp_post_unpatch_callback_
+
+#define KLP_PRE_PATCH_PREFIX __stringify(__KLP_PRE_PATCH_PREFIX)
+#define KLP_POST_PATCH_PREFIX __stringify(__KLP_POST_PATCH_PREFIX)
+#define KLP_PRE_UNPATCH_PREFIX __stringify(__KLP_PRE_UNPATCH_PREFIX)
+#define KLP_POST_UNPATCH_PREFIX __stringify(__KLP_POST_UNPATCH_PREFIX)
+
+struct klp_object;
+
+typedef int (*klp_pre_patch_t)(struct klp_object *obj);
+typedef void (*klp_post_patch_t)(struct klp_object *obj);
+typedef void (*klp_pre_unpatch_t)(struct klp_object *obj);
+typedef void (*klp_post_unpatch_t)(struct klp_object *obj);
+
+/**
+ * struct klp_callbacks - pre/post live-(un)patch callback structure
+ * @pre_patch: executed before code patching
+ * @post_patch: executed after code patching
+ * @pre_unpatch: executed before code unpatching
+ * @post_unpatch: executed after code unpatching
+ * @post_unpatch_enabled: flag indicating if post-unpatch callback
+ * should run
+ *
+ * All callbacks are optional. Only the pre-patch callback, if provided,
+ * will be unconditionally executed. If the parent klp_object fails to
+ * patch for any reason, including a non-zero error status returned from
+ * the pre-patch callback, no further callbacks will be executed.
+ */
+struct klp_callbacks {
+ klp_pre_patch_t pre_patch;
+ klp_post_patch_t post_patch;
+ klp_pre_unpatch_t pre_unpatch;
+ klp_post_unpatch_t post_unpatch;
+ bool post_unpatch_enabled;
+};
+
+/*
+ * 'struct klp_{func,object}_ext' are compact "external" representations of
+ * 'struct klp_{func,object}'. They are used by objtool for livepatch
+ * generation. The structs are then read by the livepatch module and converted
+ * to the real structs before calling klp_enable_patch().
+ *
+ * TODO make these the official API for klp_enable_patch(). That should
+ * simplify livepatch's interface as well as its data structure lifetime
+ * management.
+ */
+struct klp_func_ext {
+ const char *old_name;
+ void *new_func;
+ unsigned long sympos;
+};
+
+struct klp_object_ext {
+ const char *name;
+ struct klp_func_ext *funcs;
+ struct klp_callbacks callbacks;
+ unsigned int nr_funcs;
+};
+
+#endif /* _LINUX_LIVEPATCH_EXTERNAL_H_ */
diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h
index df5d9fa84dba..c6def4049b1a 100644
--- a/tools/include/linux/objtool_types.h
+++ b/tools/include/linux/objtool_types.h
@@ -65,5 +65,8 @@ struct unwind_hint {
#define ANNOTYPE_IGNORE_ALTS 6
#define ANNOTYPE_INTRA_FUNCTION_CALL 7
#define ANNOTYPE_REACHABLE 8
+#define ANNOTYPE_NOCFI 9
+
+#define ANNOTYPE_DATA_SPECIAL 1
#endif /* _LINUX_OBJTOOL_TYPES_H */
diff --git a/tools/include/linux/pci_ids.h b/tools/include/linux/pci_ids.h
new file mode 120000
index 000000000000..1c9e88f41261
--- /dev/null
+++ b/tools/include/linux/pci_ids.h
@@ -0,0 +1 @@
+../../../include/linux/pci_ids.h \ No newline at end of file
diff --git a/tools/include/linux/slab.h b/tools/include/linux/slab.h
index c87051e2b26f..94937a699402 100644
--- a/tools/include/linux/slab.h
+++ b/tools/include/linux/slab.h
@@ -4,11 +4,31 @@
#include <linux/types.h>
#include <linux/gfp.h>
+#include <pthread.h>
-#define SLAB_PANIC 2
#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
#define kzalloc_node(size, flags, node) kmalloc(size, flags)
+enum _slab_flag_bits {
+ _SLAB_KMALLOC,
+ _SLAB_HWCACHE_ALIGN,
+ _SLAB_PANIC,
+ _SLAB_TYPESAFE_BY_RCU,
+ _SLAB_ACCOUNT,
+ _SLAB_FLAGS_LAST_BIT
+};
+
+#define __SLAB_FLAG_BIT(nr) ((unsigned int __force)(1U << (nr)))
+#define __SLAB_FLAG_UNUSED ((unsigned int __force)(0U))
+
+#define SLAB_HWCACHE_ALIGN __SLAB_FLAG_BIT(_SLAB_HWCACHE_ALIGN)
+#define SLAB_PANIC __SLAB_FLAG_BIT(_SLAB_PANIC)
+#define SLAB_TYPESAFE_BY_RCU __SLAB_FLAG_BIT(_SLAB_TYPESAFE_BY_RCU)
+#ifdef CONFIG_MEMCG
+# define SLAB_ACCOUNT __SLAB_FLAG_BIT(_SLAB_ACCOUNT)
+#else
+# define SLAB_ACCOUNT __SLAB_FLAG_UNUSED
+#endif
void *kmalloc(size_t size, gfp_t gfp);
void kfree(void *p);
@@ -23,6 +43,98 @@ enum slab_state {
FULL
};
+struct kmem_cache {
+ pthread_mutex_t lock;
+ unsigned int size;
+ unsigned int align;
+ unsigned int sheaf_capacity;
+ int nr_objs;
+ void *objs;
+ void (*ctor)(void *);
+ bool non_kernel_enabled;
+ unsigned int non_kernel;
+ unsigned long nr_allocated;
+ unsigned long nr_tallocated;
+ bool exec_callback;
+ void (*callback)(void *);
+ void *private;
+};
+
+struct kmem_cache_args {
+ /**
+ * @align: The required alignment for the objects.
+ *
+ * %0 means no specific alignment is requested.
+ */
+ unsigned int align;
+ /**
+ * @sheaf_capacity: The maximum size of the sheaf.
+ */
+ unsigned int sheaf_capacity;
+ /**
+ * @useroffset: Usercopy region offset.
+ *
+ * %0 is a valid offset, when @usersize is non-%0
+ */
+ unsigned int useroffset;
+ /**
+ * @usersize: Usercopy region size.
+ *
+ * %0 means no usercopy region is specified.
+ */
+ unsigned int usersize;
+ /**
+ * @freeptr_offset: Custom offset for the free pointer
+ * in &SLAB_TYPESAFE_BY_RCU caches
+ *
+ * By default &SLAB_TYPESAFE_BY_RCU caches place the free pointer
+ * outside of the object. This might cause the object to grow in size.
+ * Cache creators that have a reason to avoid this can specify a custom
+ * free pointer offset in their struct where the free pointer will be
+ * placed.
+ *
+ * Note that placing the free pointer inside the object requires the
+ * caller to ensure that no fields are invalidated that are required to
+ * guard against object recycling (See &SLAB_TYPESAFE_BY_RCU for
+ * details).
+ *
+ * Using %0 as a value for @freeptr_offset is valid. If @freeptr_offset
+ * is specified, %use_freeptr_offset must be set %true.
+ *
+ * Note that @ctor currently isn't supported with custom free pointers
+ * as a @ctor requires an external free pointer.
+ */
+ unsigned int freeptr_offset;
+ /**
+ * @use_freeptr_offset: Whether a @freeptr_offset is used.
+ */
+ bool use_freeptr_offset;
+ /**
+ * @ctor: A constructor for the objects.
+ *
+ * The constructor is invoked for each object in a newly allocated slab
+ * page. It is the cache user's responsibility to free object in the
+ * same state as after calling the constructor, or deal appropriately
+ * with any differences between a freshly constructed and a reallocated
+ * object.
+ *
+ * %NULL means no constructor.
+ */
+ void (*ctor)(void *);
+};
+
+struct slab_sheaf {
+ union {
+ struct list_head barn_list;
+ /* only used for prefilled sheafs */
+ unsigned int capacity;
+ };
+ struct kmem_cache *cache;
+ unsigned int size;
+ int node; /* only used for rcu_sheaf */
+ void *objects[];
+};
+
static inline void *kzalloc(size_t size, gfp_t gfp)
{
return kmalloc(size, gfp | __GFP_ZERO);
@@ -37,12 +149,57 @@ static inline void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
}
void kmem_cache_free(struct kmem_cache *cachep, void *objp);
-struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
- unsigned int align, unsigned int flags,
- void (*ctor)(void *));
+
+struct kmem_cache *
+__kmem_cache_create_args(const char *name, unsigned int size,
+ struct kmem_cache_args *args, unsigned int flags);
+
+/* If NULL is passed for @args, use this variant with default arguments. */
+static inline struct kmem_cache *
+__kmem_cache_default_args(const char *name, unsigned int size,
+ struct kmem_cache_args *args, unsigned int flags)
+{
+ struct kmem_cache_args kmem_default_args = {};
+
+ return __kmem_cache_create_args(name, size, &kmem_default_args, flags);
+}
+
+static inline struct kmem_cache *
+__kmem_cache_create(const char *name, unsigned int size, unsigned int align,
+ unsigned int flags, void (*ctor)(void *))
+{
+ struct kmem_cache_args kmem_args = {
+ .align = align,
+ .ctor = ctor,
+ };
+
+ return __kmem_cache_create_args(name, size, &kmem_args, flags);
+}
+
+#define kmem_cache_create(__name, __object_size, __args, ...) \
+ _Generic((__args), \
+ struct kmem_cache_args *: __kmem_cache_create_args, \
+ void *: __kmem_cache_default_args, \
+ default: __kmem_cache_create)(__name, __object_size, __args, __VA_ARGS__)
void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list);
int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size,
void **list);
+struct slab_sheaf *
+kmem_cache_prefill_sheaf(struct kmem_cache *s, gfp_t gfp, unsigned int size);
+
+void *
+kmem_cache_alloc_from_sheaf(struct kmem_cache *s, gfp_t gfp,
+ struct slab_sheaf *sheaf);
+
+void kmem_cache_return_sheaf(struct kmem_cache *s, gfp_t gfp,
+ struct slab_sheaf *sheaf);
+int kmem_cache_refill_sheaf(struct kmem_cache *s, gfp_t gfp,
+ struct slab_sheaf **sheafp, unsigned int size);
+
+static inline unsigned int kmem_cache_sheaf_size(struct slab_sheaf *sheaf)
+{
+ return sheaf->size;
+}
#endif /* _TOOLS_SLAB_H */
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index 5a00b8b2cf9f..cfb6ddeb292b 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -25,6 +25,8 @@
#define STATIC_CALL_SITE_INIT 2UL /* init section */
#define STATIC_CALL_SITE_FLAGS 3UL
+#ifndef __ASSEMBLY__
+
/*
* The static call site table needs to be created by external tooling (objtool
* or a compiler plugin).
@@ -100,4 +102,6 @@ struct static_call_key {
#endif /* CONFIG_HAVE_STATIC_CALL */
+#endif /* __ASSEMBLY__ */
+
#endif /* _STATIC_CALL_TYPES_H */
diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h
index 8499f509f03e..51ad3cf4fa82 100644
--- a/tools/include/linux/string.h
+++ b/tools/include/linux/string.h
@@ -44,6 +44,20 @@ static inline bool strstarts(const char *str, const char *prefix)
return strncmp(str, prefix, strlen(prefix)) == 0;
}
+/*
+ * Checks if a string ends with another.
+ */
+static inline bool str_ends_with(const char *str, const char *substr)
+{
+ size_t len = strlen(str);
+ size_t sublen = strlen(substr);
+
+ if (sublen > len)
+ return false;
+
+ return !strcmp(str + len - sublen, substr);
+}
+
extern char * __must_check skip_spaces(const char *);
extern char *strim(char *);
diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile
index 143c2d2c2ba6..8118e22844f1 100644
--- a/tools/include/nolibc/Makefile
+++ b/tools/include/nolibc/Makefile
@@ -23,7 +23,7 @@ else
Q=@
endif
-arch_file := arch-$(ARCH).h
+arch_files := arch.h $(wildcard arch-*.h)
all_files := \
compiler.h \
crt.h \
@@ -33,6 +33,7 @@ all_files := \
errno.h \
fcntl.h \
getopt.h \
+ inttypes.h \
limits.h \
math.h \
nolibc.h \
@@ -56,12 +57,14 @@ all_files := \
sys/random.h \
sys/reboot.h \
sys/resource.h \
+ sys/select.h \
sys/stat.h \
sys/syscall.h \
sys/sysmacros.h \
sys/time.h \
sys/timerfd.h \
sys/types.h \
+ sys/uio.h \
sys/utsname.h \
sys/wait.h \
time.h \
@@ -79,7 +82,7 @@ help:
@echo "Supported targets under nolibc:"
@echo " all call \"headers\""
@echo " clean clean the sysroot"
- @echo " headers prepare a sysroot in tools/include/nolibc/sysroot"
+ @echo " headers prepare a multi-arch sysroot in \$${OUTPUT}sysroot"
@echo " headers_standalone like \"headers\", and also install kernel headers"
@echo " help this help"
@echo ""
@@ -90,18 +93,11 @@ help:
@echo " OUTPUT = $(OUTPUT)"
@echo ""
+# installs headers for all archs at once.
headers:
- $(Q)mkdir -p $(OUTPUT)sysroot
- $(Q)mkdir -p $(OUTPUT)sysroot/include
- $(Q)cp --parents $(all_files) $(OUTPUT)sysroot/include/
- $(Q)if [ "$(ARCH)" = "i386" -o "$(ARCH)" = "x86_64" ]; then \
- cat arch-x86.h; \
- elif [ -e "$(arch_file)" ]; then \
- cat $(arch_file); \
- else \
- echo "Fatal: architecture $(ARCH) not yet supported by nolibc." >&2; \
- exit 1; \
- fi > $(OUTPUT)sysroot/include/arch.h
+ $(Q)mkdir -p "$(OUTPUT)sysroot"
+ $(Q)mkdir -p "$(OUTPUT)sysroot/include"
+ $(Q)cp --parents $(arch_files) $(all_files) "$(OUTPUT)sysroot/include/"
headers_standalone: headers
$(Q)$(MAKE) -C $(srctree) headers
diff --git a/tools/include/nolibc/arch-arm.h b/tools/include/nolibc/arch-arm.h
index 1f66e7e5a444..251c42579028 100644
--- a/tools/include/nolibc/arch-arm.h
+++ b/tools/include/nolibc/arch-arm.h
@@ -184,6 +184,7 @@
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -193,5 +194,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_ARM_H */
diff --git a/tools/include/nolibc/arch-arm64.h b/tools/include/nolibc/arch-arm64.h
index 02a3f74c8ec8..080a55a7144e 100644
--- a/tools/include/nolibc/arch-arm64.h
+++ b/tools/include/nolibc/arch-arm64.h
@@ -141,6 +141,7 @@
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -150,4 +151,5 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_ARM64_H */
diff --git a/tools/include/nolibc/arch-loongarch.h b/tools/include/nolibc/arch-loongarch.h
index 5511705303ea..c894176c3f89 100644
--- a/tools/include/nolibc/arch-loongarch.h
+++ b/tools/include/nolibc/arch-loongarch.h
@@ -142,6 +142,7 @@
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -151,5 +152,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_LOONGARCH_H */
diff --git a/tools/include/nolibc/arch-m68k.h b/tools/include/nolibc/arch-m68k.h
index 6dac1845f298..2a4fbada5e79 100644
--- a/tools/include/nolibc/arch-m68k.h
+++ b/tools/include/nolibc/arch-m68k.h
@@ -128,6 +128,7 @@
_num; \
})
+#ifndef NOLIBC_NO_RUNTIME
void _start(void);
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -137,5 +138,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_M68K_H */
diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h
index 0cbac63b249a..a72506ceec6b 100644
--- a/tools/include/nolibc/arch-mips.h
+++ b/tools/include/nolibc/arch-mips.h
@@ -245,6 +245,7 @@
#endif /* _ABIO32 */
+#ifndef NOLIBC_NO_RUNTIME
/* startup code, note that it's called __start on MIPS */
void __start(void);
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void)
@@ -266,5 +267,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_MIPS_H */
diff --git a/tools/include/nolibc/arch-powerpc.h b/tools/include/nolibc/arch-powerpc.h
index 204564bbcd32..e0c7e0b81f7c 100644
--- a/tools/include/nolibc/arch-powerpc.h
+++ b/tools/include/nolibc/arch-powerpc.h
@@ -183,6 +183,7 @@
#endif
#endif /* !__powerpc64__ */
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -215,5 +216,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
#endif
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_POWERPC_H */
diff --git a/tools/include/nolibc/arch-riscv.h b/tools/include/nolibc/arch-riscv.h
index 885383a86c38..1c00cacf57e1 100644
--- a/tools/include/nolibc/arch-riscv.h
+++ b/tools/include/nolibc/arch-riscv.h
@@ -139,6 +139,7 @@
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -152,5 +153,6 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_RISCV_H */
diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h
index df4c3cc713ac..74125a254ce3 100644
--- a/tools/include/nolibc/arch-s390.h
+++ b/tools/include/nolibc/arch-s390.h
@@ -139,22 +139,19 @@
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
__asm__ volatile (
-#ifdef __s390x__
"lgr %r2, %r15\n" /* save stack pointer to %r2, as arg1 of _start_c */
"aghi %r15, -160\n" /* allocate new stackframe */
-#else
- "lr %r2, %r15\n"
- "ahi %r15, -96\n"
-#endif
"xc 0(8,%r15), 0(%r15)\n" /* clear backchain */
"brasl %r14, _start_c\n" /* transfer to c runtime */
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
struct s390_mmap_arg_struct {
unsigned long addr;
diff --git a/tools/include/nolibc/arch-sh.h b/tools/include/nolibc/arch-sh.h
index a96b8914607e..7a421197d104 100644
--- a/tools/include/nolibc/arch-sh.h
+++ b/tools/include/nolibc/arch-sh.h
@@ -140,6 +140,7 @@
_ret; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void _start_wrapper(void);
void __attribute__((weak,noreturn)) __nolibc_entrypoint __no_stack_protector _start_wrapper(void)
@@ -158,5 +159,6 @@ void __attribute__((weak,noreturn)) __nolibc_entrypoint __no_stack_protector _st
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_ARCH_SH_H */
diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h
index ca420d843e25..2ebb5686e105 100644
--- a/tools/include/nolibc/arch-sparc.h
+++ b/tools/include/nolibc/arch-sparc.h
@@ -152,6 +152,7 @@
_arg1; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
{
@@ -169,6 +170,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
static pid_t getpid(void);
diff --git a/tools/include/nolibc/arch-x86.h b/tools/include/nolibc/arch-x86.h
index d3efc0c3b8ad..f6c43ac5377b 100644
--- a/tools/include/nolibc/arch-x86.h
+++ b/tools/include/nolibc/arch-x86.h
@@ -157,6 +157,7 @@
_eax; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
/*
* i386 System V ABI mandates:
@@ -176,6 +177,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#else /* !defined(__x86_64__) */
@@ -323,6 +325,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
_ret; \
})
+#ifndef NOLIBC_NO_RUNTIME
/* startup code */
/*
* x86-64 System V ABI mandates:
@@ -340,6 +343,7 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s
);
__nolibc_entrypoint_epilogue();
}
+#endif /* NOLIBC_NO_RUNTIME */
#define NOLIBC_ARCH_HAS_MEMMOVE
void *memmove(void *dst, const void *src, size_t len);
@@ -351,7 +355,7 @@ void *memcpy(void *dst, const void *src, size_t len);
void *memset(void *dst, int c, size_t len);
__asm__ (
-".section .text.nolibc_memmove_memcpy\n"
+".pushsection .text.nolibc_memmove_memcpy\n"
".weak memmove\n"
".weak memcpy\n"
"memmove:\n"
@@ -371,8 +375,9 @@ __asm__ (
"rep movsb\n\t"
"cld\n\t"
"retq\n"
+".popsection\n"
-".section .text.nolibc_memset\n"
+".pushsection .text.nolibc_memset\n"
".weak memset\n"
"memset:\n"
"xchgl %eax, %esi\n\t"
@@ -381,6 +386,7 @@ __asm__ (
"rep stosb\n\t"
"popq %rax\n\t"
"retq\n"
+".popsection\n"
);
#endif /* !defined(__x86_64__) */
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index 426c89198135..a3adaf433f2c 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -3,15 +3,6 @@
* Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu>
*/
-/* Below comes the architecture-specific code. For each architecture, we have
- * the syscall declarations and the _start code definition. This is the only
- * global part. On all architectures the kernel puts everything in the stack
- * before jumping to _start just above us, without any return address (_start
- * is not a function but an entry point). So at the stack pointer we find argc.
- * Then argv[] begins, and ends at the first NULL. Then we have envp which
- * starts and ends with a NULL as well. So envp=argv+argc+1.
- */
-
#ifndef _NOLIBC_ARCH_H
#define _NOLIBC_ARCH_H
@@ -27,7 +18,7 @@
#include "arch-powerpc.h"
#elif defined(__riscv)
#include "arch-riscv.h"
-#elif defined(__s390x__) || defined(__s390__)
+#elif defined(__s390x__)
#include "arch-s390.h"
#elif defined(__loongarch__)
#include "arch-loongarch.h"
diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h
index 369cfb5a0e78..87090bbc53e0 100644
--- a/tools/include/nolibc/compiler.h
+++ b/tools/include/nolibc/compiler.h
@@ -41,8 +41,8 @@
# define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector")))
#endif /* __nolibc_has_attribute(no_stack_protector) */
-#if __nolibc_has_attribute(fallthrough)
-# define __nolibc_fallthrough do { } while (0); __attribute__((fallthrough))
+#if __nolibc_has_attribute(__fallthrough__)
+# define __nolibc_fallthrough do { } while (0); __attribute__((__fallthrough__))
#else
# define __nolibc_fallthrough do { } while (0)
#endif /* __nolibc_has_attribute(fallthrough) */
diff --git a/tools/include/nolibc/crt.h b/tools/include/nolibc/crt.h
index 961cfe777c35..d9262998dae9 100644
--- a/tools/include/nolibc/crt.h
+++ b/tools/include/nolibc/crt.h
@@ -7,6 +7,8 @@
#ifndef _NOLIBC_CRT_H
#define _NOLIBC_CRT_H
+#ifndef NOLIBC_NO_RUNTIME
+
#include "compiler.h"
char **environ __attribute__((weak));
@@ -88,4 +90,5 @@ void _start_c(long *sp)
exit(exitcode);
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_CRT_H */
diff --git a/tools/include/nolibc/dirent.h b/tools/include/nolibc/dirent.h
index 758b95c48e7a..61a122a60327 100644
--- a/tools/include/nolibc/dirent.h
+++ b/tools/include/nolibc/dirent.h
@@ -86,9 +86,9 @@ int readdir_r(DIR *dirp, struct dirent *entry, struct dirent **result)
* readdir() can only return one entry at a time.
* Make sure the non-returned ones are not skipped.
*/
- ret = lseek(fd, ldir->d_off, SEEK_SET);
- if (ret == -1)
- return errno;
+ ret = sys_lseek(fd, ldir->d_off, SEEK_SET);
+ if (ret < 0)
+ return -ret;
entry->d_ino = ldir->d_ino;
/* the destination should always be big enough */
diff --git a/tools/include/nolibc/getopt.h b/tools/include/nolibc/getopt.h
index 217abb95264b..87565e3b6a33 100644
--- a/tools/include/nolibc/getopt.h
+++ b/tools/include/nolibc/getopt.h
@@ -78,7 +78,7 @@ int getopt(int argc, char * const argv[], const char *optstring)
return '?';
}
if (optstring[i] == ':') {
- optarg = 0;
+ optarg = NULL;
if (optstring[i + 1] != ':' || __optpos) {
optarg = argv[optind++];
if (__optpos)
diff --git a/tools/include/nolibc/inttypes.h b/tools/include/nolibc/inttypes.h
new file mode 100644
index 000000000000..1977bd74bfeb
--- /dev/null
+++ b/tools/include/nolibc/inttypes.h
@@ -0,0 +1,3 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#include "nolibc.h"
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index c199ade200c2..272dfc961158 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -104,11 +104,13 @@
#include "sys/random.h"
#include "sys/reboot.h"
#include "sys/resource.h"
+#include "sys/select.h"
#include "sys/stat.h"
#include "sys/syscall.h"
#include "sys/sysmacros.h"
#include "sys/time.h"
#include "sys/timerfd.h"
+#include "sys/uio.h"
#include "sys/utsname.h"
#include "sys/wait.h"
#include "ctype.h"
@@ -116,6 +118,7 @@
#include "sched.h"
#include "signal.h"
#include "unistd.h"
+#include "stdbool.h"
#include "stdio.h"
#include "stdlib.h"
#include "string.h"
diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h
index 1765acb17ea0..0d053f93ea99 100644
--- a/tools/include/nolibc/poll.h
+++ b/tools/include/nolibc/poll.h
@@ -39,10 +39,8 @@ int sys_poll(struct pollfd *fds, int nfds, int timeout)
t.tv_nsec = (timeout % 1000) * 1000000;
}
return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0);
-#elif defined(__NR_poll)
- return my_syscall3(__NR_poll, fds, nfds, timeout);
#else
- return __nolibc_enosys(__func__, fds, nfds, timeout);
+ return my_syscall3(__NR_poll, fds, nfds, timeout);
#endif
}
diff --git a/tools/include/nolibc/stackprotector.h b/tools/include/nolibc/stackprotector.h
index c71a2c257177..7123aa056cb0 100644
--- a/tools/include/nolibc/stackprotector.h
+++ b/tools/include/nolibc/stackprotector.h
@@ -9,6 +9,7 @@
#include "compiler.h"
+#ifndef NOLIBC_NO_RUNTIME
#if defined(_NOLIBC_STACKPROTECTOR)
#include "sys.h"
@@ -49,5 +50,6 @@ static __no_stack_protector void __stack_chk_init(void)
#else /* !defined(_NOLIBC_STACKPROTECTOR) */
static void __stack_chk_init(void) {}
#endif /* defined(_NOLIBC_STACKPROTECTOR) */
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_STACKPROTECTOR_H */
diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h
index ba950f0e7338..392f4dd94158 100644
--- a/tools/include/nolibc/std.h
+++ b/tools/include/nolibc/std.h
@@ -20,15 +20,15 @@
/* those are commonly provided by sys/types.h */
typedef unsigned int dev_t;
-typedef unsigned long ino_t;
+typedef uint64_t ino_t;
typedef unsigned int mode_t;
typedef signed int pid_t;
typedef unsigned int uid_t;
typedef unsigned int gid_t;
typedef unsigned long nlink_t;
-typedef signed long off_t;
+typedef int64_t off_t;
typedef signed long blksize_t;
typedef signed long blkcnt_t;
-typedef __kernel_old_time_t time_t;
+typedef __kernel_time_t time_t;
#endif /* _NOLIBC_STD_H */
diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h
index 7630234408c5..1f16dab2ac88 100644
--- a/tools/include/nolibc/stdio.h
+++ b/tools/include/nolibc/stdio.h
@@ -321,11 +321,13 @@ int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char
if (!outstr)
outstr="(null)";
}
-#ifndef NOLIBC_IGNORE_ERRNO
else if (c == 'm') {
+#ifdef NOLIBC_IGNORE_ERRNO
+ outstr = "unknown error";
+#else
outstr = strerror(errno);
- }
#endif /* NOLIBC_IGNORE_ERRNO */
+ }
else if (c == '%') {
/* queue it verbatim */
continue;
@@ -600,7 +602,11 @@ int sscanf(const char *str, const char *format, ...)
static __attribute__((unused))
void perror(const char *msg)
{
+#ifdef NOLIBC_IGNORE_ERRNO
+ fprintf(stderr, "%s%sunknown error\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "");
+#else
fprintf(stderr, "%s%serrno=%d\n", (msg && *msg) ? msg : "", (msg && *msg) ? ": " : "", errno);
+#endif
}
static __attribute__((unused))
diff --git a/tools/include/nolibc/stdlib.h b/tools/include/nolibc/stdlib.h
index 5fd99a480f82..f184e108ed0a 100644
--- a/tools/include/nolibc/stdlib.h
+++ b/tools/include/nolibc/stdlib.h
@@ -100,6 +100,7 @@ void free(void *ptr)
munmap(heap, heap->len);
}
+#ifndef NOLIBC_NO_RUNTIME
/* getenv() tries to find the environment variable named <name> in the
* environment array pointed to by global variable "environ" which must be
* declared as a char **, and must be terminated by a NULL (it is recommended
@@ -122,6 +123,7 @@ char *getenv(const char *name)
}
return NULL;
}
+#endif /* NOLIBC_NO_RUNTIME */
static __attribute__((unused))
void *malloc(size_t len)
diff --git a/tools/include/nolibc/string.h b/tools/include/nolibc/string.h
index 163a17e7dd38..4000926f44ac 100644
--- a/tools/include/nolibc/string.h
+++ b/tools/include/nolibc/string.h
@@ -93,6 +93,21 @@ void *memset(void *dst, int b, size_t len)
}
#endif /* #ifndef NOLIBC_ARCH_HAS_MEMSET */
+#ifndef NOLIBC_ARCH_HAS_MEMCHR
+static __attribute__((unused))
+void *memchr(const void *s, int c, size_t len)
+{
+ char *p = (char *)s;
+
+ while (len--) {
+ if (*p == (char)c)
+ return p;
+ p++;
+ }
+ return NULL;
+}
+#endif /* #ifndef NOLIBC_ARCH_HAS_MEMCHR */
+
static __attribute__((unused))
char *strchr(const char *s, int c)
{
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index 295e71d34aba..847af1ccbdc9 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -106,7 +106,7 @@ static __attribute__((unused))
void *sbrk(intptr_t inc)
{
/* first call to find current end */
- void *ret = sys_brk(0);
+ void *ret = sys_brk(NULL);
if (ret && sys_brk(ret + inc) == ret + inc)
return ret + inc;
@@ -118,6 +118,7 @@ void *sbrk(intptr_t inc)
/*
* int chdir(const char *path);
+ * int fchdir(int fildes);
*/
static __attribute__((unused))
@@ -132,6 +133,18 @@ int chdir(const char *path)
return __sysret(sys_chdir(path));
}
+static __attribute__((unused))
+int sys_fchdir(int fildes)
+{
+ return my_syscall1(__NR_fchdir, fildes);
+}
+
+static __attribute__((unused))
+int fchdir(int fildes)
+{
+ return __sysret(sys_fchdir(fildes));
+}
+
/*
* int chmod(const char *path, mode_t mode);
@@ -142,10 +155,8 @@ int sys_chmod(const char *path, mode_t mode)
{
#if defined(__NR_fchmodat)
return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
-#elif defined(__NR_chmod)
- return my_syscall2(__NR_chmod, path, mode);
#else
- return __nolibc_enosys(__func__, path, mode);
+ return my_syscall2(__NR_chmod, path, mode);
#endif
}
@@ -165,10 +176,8 @@ int sys_chown(const char *path, uid_t owner, gid_t group)
{
#if defined(__NR_fchownat)
return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
-#elif defined(__NR_chown)
- return my_syscall3(__NR_chown, path, owner, group);
#else
- return __nolibc_enosys(__func__, path, owner, group);
+ return my_syscall3(__NR_chown, path, owner, group);
#endif
}
@@ -238,11 +247,22 @@ static __attribute__((unused))
int sys_dup2(int old, int new)
{
#if defined(__NR_dup3)
+ int ret, nr_fcntl;
+
+#ifdef __NR_fcntl64
+ nr_fcntl = __NR_fcntl64;
+#else
+ nr_fcntl = __NR_fcntl;
+#endif
+
+ if (old == new) {
+ ret = my_syscall2(nr_fcntl, old, F_GETFD);
+ return ret < 0 ? ret : old;
+ }
+
return my_syscall3(__NR_dup3, old, new, 0);
-#elif defined(__NR_dup2)
- return my_syscall2(__NR_dup2, old, new);
#else
- return __nolibc_enosys(__func__, old, new);
+ return my_syscall2(__NR_dup2, old, new);
#endif
}
@@ -327,10 +347,8 @@ pid_t sys_fork(void)
* will not use the rest with no other flag.
*/
return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0);
-#elif defined(__NR_fork)
- return my_syscall0(__NR_fork);
#else
- return __nolibc_enosys(__func__);
+ return my_syscall0(__NR_fork);
#endif
}
#endif
@@ -347,7 +365,7 @@ pid_t sys_vfork(void)
{
#if defined(__NR_vfork)
return my_syscall0(__NR_vfork);
-#elif defined(__NR_clone3)
+#else
/*
* clone() could be used but has different argument orders per
* architecture.
@@ -358,8 +376,6 @@ pid_t sys_vfork(void)
};
return my_syscall2(__NR_clone3, &args, sizeof(args));
-#else
- return __nolibc_enosys(__func__);
#endif
}
#endif
@@ -509,6 +525,7 @@ pid_t gettid(void)
return sys_gettid();
}
+#ifndef NOLIBC_NO_RUNTIME
static unsigned long getauxval(unsigned long key);
/*
@@ -520,7 +537,7 @@ int getpagesize(void)
{
return __sysret((int)getauxval(AT_PAGESZ) ?: -ENOENT);
}
-
+#endif /* NOLIBC_NO_RUNTIME */
/*
* uid_t getuid(void);
@@ -569,10 +586,8 @@ int sys_link(const char *old, const char *new)
{
#if defined(__NR_linkat)
return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
-#elif defined(__NR_link)
- return my_syscall2(__NR_link, old, new);
#else
- return __nolibc_enosys(__func__, old, new);
+ return my_syscall2(__NR_link, old, new);
#endif
}
@@ -590,44 +605,27 @@ int link(const char *old, const char *new)
static __attribute__((unused))
off_t sys_lseek(int fd, off_t offset, int whence)
{
-#if defined(__NR_lseek)
- return my_syscall3(__NR_lseek, fd, offset, whence);
-#else
- return __nolibc_enosys(__func__, fd, offset, whence);
-#endif
-}
-
-static __attribute__((unused))
-int sys_llseek(int fd, unsigned long offset_high, unsigned long offset_low,
- __kernel_loff_t *result, int whence)
-{
#if defined(__NR_llseek)
- return my_syscall5(__NR_llseek, fd, offset_high, offset_low, result, whence);
+ __kernel_loff_t loff = 0;
+ off_t result;
+ int ret;
+
+ ret = my_syscall5(__NR_llseek, fd, offset >> 32, (uint32_t)offset, &loff, whence);
+ if (ret < 0)
+ result = ret;
+ else
+ result = loff;
+
+ return result;
#else
- return __nolibc_enosys(__func__, fd, offset_high, offset_low, result, whence);
+ return my_syscall3(__NR_lseek, fd, offset, whence);
#endif
}
static __attribute__((unused))
off_t lseek(int fd, off_t offset, int whence)
{
- __kernel_loff_t loff = 0;
- off_t result;
- int ret;
-
- result = sys_lseek(fd, offset, whence);
- if (result == -ENOSYS) {
- /* Only exists on 32bit where nolibc off_t is also 32bit */
- ret = sys_llseek(fd, 0, offset, &loff, whence);
- if (ret < 0)
- result = ret;
- else if (loff != (off_t)loff)
- result = -EOVERFLOW;
- else
- result = loff;
- }
-
- return __sysret(result);
+ return __sysret(sys_lseek(fd, offset, whence));
}
@@ -640,10 +638,8 @@ int sys_mkdir(const char *path, mode_t mode)
{
#if defined(__NR_mkdirat)
return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
-#elif defined(__NR_mkdir)
- return my_syscall2(__NR_mkdir, path, mode);
#else
- return __nolibc_enosys(__func__, path, mode);
+ return my_syscall2(__NR_mkdir, path, mode);
#endif
}
@@ -662,10 +658,8 @@ int sys_rmdir(const char *path)
{
#if defined(__NR_rmdir)
return my_syscall1(__NR_rmdir, path);
-#elif defined(__NR_unlinkat)
- return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR);
#else
- return __nolibc_enosys(__func__, path);
+ return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR);
#endif
}
@@ -685,10 +679,8 @@ long sys_mknod(const char *path, mode_t mode, dev_t dev)
{
#if defined(__NR_mknodat)
return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
-#elif defined(__NR_mknod)
- return my_syscall3(__NR_mknod, path, mode, dev);
#else
- return __nolibc_enosys(__func__, path, mode, dev);
+ return my_syscall3(__NR_mknod, path, mode, dev);
#endif
}
@@ -775,53 +767,6 @@ int sched_yield(void)
/*
- * int select(int nfds, fd_set *read_fds, fd_set *write_fds,
- * fd_set *except_fds, struct timeval *timeout);
- */
-
-static __attribute__((unused))
-int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
-#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
- struct sel_arg_struct {
- unsigned long n;
- fd_set *r, *w, *e;
- struct timeval *t;
- } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
- return my_syscall1(__NR_select, &arg);
-#elif defined(__NR__newselect)
- return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
-#elif defined(__NR_select)
- return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout);
-#elif defined(__NR_pselect6)
- struct timespec t;
-
- if (timeout) {
- t.tv_sec = timeout->tv_sec;
- t.tv_nsec = timeout->tv_usec * 1000;
- }
- return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
-#elif defined(__NR_pselect6_time64)
- struct __kernel_timespec t;
-
- if (timeout) {
- t.tv_sec = timeout->tv_sec;
- t.tv_nsec = timeout->tv_usec * 1000;
- }
- return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
-#else
- return __nolibc_enosys(__func__, nfds, rfds, wfds, efds, timeout);
-#endif
-}
-
-static __attribute__((unused))
-int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
-{
- return __sysret(sys_select(nfds, rfds, wfds, efds, timeout));
-}
-
-
-/*
* int setpgid(pid_t pid, pid_t pgid);
*/
@@ -874,10 +819,8 @@ int sys_symlink(const char *old, const char *new)
{
#if defined(__NR_symlinkat)
return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
-#elif defined(__NR_symlink)
- return my_syscall2(__NR_symlink, old, new);
#else
- return __nolibc_enosys(__func__, old, new);
+ return my_syscall2(__NR_symlink, old, new);
#endif
}
@@ -931,10 +874,8 @@ int sys_unlink(const char *path)
{
#if defined(__NR_unlinkat)
return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
-#elif defined(__NR_unlink)
- return my_syscall1(__NR_unlink, path);
#else
- return __nolibc_enosys(__func__, path);
+ return my_syscall1(__NR_unlink, path);
#endif
}
diff --git a/tools/include/nolibc/sys/auxv.h b/tools/include/nolibc/sys/auxv.h
index c52463d6c18d..0e98325e7347 100644
--- a/tools/include/nolibc/sys/auxv.h
+++ b/tools/include/nolibc/sys/auxv.h
@@ -10,6 +10,8 @@
#ifndef _NOLIBC_SYS_AUXV_H
#define _NOLIBC_SYS_AUXV_H
+#ifndef NOLIBC_NO_RUNTIME
+
#include "../crt.h"
static __attribute__((unused))
@@ -38,4 +40,5 @@ unsigned long getauxval(unsigned long type)
return ret;
}
+#endif /* NOLIBC_NO_RUNTIME */
#endif /* _NOLIBC_SYS_AUXV_H */
diff --git a/tools/include/nolibc/sys/mman.h b/tools/include/nolibc/sys/mman.h
index 5228751b458c..77084ac3405a 100644
--- a/tools/include/nolibc/sys/mman.h
+++ b/tools/include/nolibc/sys/mman.h
@@ -31,11 +31,6 @@ void *sys_mmap(void *addr, size_t length, int prot, int flags, int fd,
}
#endif
-/* Note that on Linux, MAP_FAILED is -1 so we can use the generic __sysret()
- * which returns -1 upon error and still satisfy user land that checks for
- * MAP_FAILED.
- */
-
static __attribute__((unused))
void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset)
{
diff --git a/tools/include/nolibc/sys/random.h b/tools/include/nolibc/sys/random.h
index 8d9749f1c845..cd5d25c571a8 100644
--- a/tools/include/nolibc/sys/random.h
+++ b/tools/include/nolibc/sys/random.h
@@ -22,13 +22,13 @@
static __attribute__((unused))
ssize_t sys_getrandom(void *buf, size_t buflen, unsigned int flags)
{
- return my_syscall3(__NR_getrandom, buf, buflen, flags);
+ return my_syscall3(__NR_getrandom, buf, buflen, flags);
}
static __attribute__((unused))
ssize_t getrandom(void *buf, size_t buflen, unsigned int flags)
{
- return __sysret(sys_getrandom(buf, buflen, flags));
+ return __sysret(sys_getrandom(buf, buflen, flags));
}
#endif /* _NOLIBC_SYS_RANDOM_H */
diff --git a/tools/include/nolibc/sys/reboot.h b/tools/include/nolibc/sys/reboot.h
index 4a1e435be669..38274c64a722 100644
--- a/tools/include/nolibc/sys/reboot.h
+++ b/tools/include/nolibc/sys/reboot.h
@@ -28,7 +28,7 @@ ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg)
static __attribute__((unused))
int reboot(int cmd)
{
- return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0));
+ return __sysret(sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, NULL));
}
#endif /* _NOLIBC_SYS_REBOOT_H */
diff --git a/tools/include/nolibc/sys/select.h b/tools/include/nolibc/sys/select.h
new file mode 100644
index 000000000000..2a5619c01277
--- /dev/null
+++ b/tools/include/nolibc/sys/select.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_SELECT_H
+#define _NOLIBC_SYS_SELECT_H
+
+#include <linux/time.h>
+#include <linux/unistd.h>
+
+/* commonly an fd_set represents 256 FDs */
+#ifndef FD_SETSIZE
+#define FD_SETSIZE 256
+#endif
+
+#define FD_SETIDXMASK (8 * sizeof(unsigned long))
+#define FD_SETBITMASK (8 * sizeof(unsigned long)-1)
+
+/* for select() */
+typedef struct {
+ unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK];
+} fd_set;
+
+#define FD_CLR(fd, set) do { \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ if (__fd >= 0) \
+ __set->fds[__fd / FD_SETIDXMASK] &= \
+ ~(1U << (__fd & FD_SETBITMASK)); \
+ } while (0)
+
+#define FD_SET(fd, set) do { \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ if (__fd >= 0) \
+ __set->fds[__fd / FD_SETIDXMASK] |= \
+ 1 << (__fd & FD_SETBITMASK); \
+ } while (0)
+
+#define FD_ISSET(fd, set) ({ \
+ fd_set *__set = (set); \
+ int __fd = (fd); \
+ int __r = 0; \
+ if (__fd >= 0) \
+ __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \
+1U << (__fd & FD_SETBITMASK)); \
+ __r; \
+ })
+
+#define FD_ZERO(set) do { \
+ fd_set *__set = (set); \
+ int __idx; \
+ int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\
+ for (__idx = 0; __idx < __size; __idx++) \
+ __set->fds[__idx] = 0; \
+ } while (0)
+
+/*
+ * int select(int nfds, fd_set *read_fds, fd_set *write_fds,
+ * fd_set *except_fds, struct timeval *timeout);
+ */
+
+static __attribute__((unused))
+int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect)
+ struct sel_arg_struct {
+ unsigned long n;
+ fd_set *r, *w, *e;
+ struct timeval *t;
+ } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout };
+ return my_syscall1(__NR_select, &arg);
+#elif defined(__NR__newselect)
+ return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
+#elif defined(__NR_select)
+ return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout);
+#elif defined(__NR_pselect6)
+ struct timespec t;
+
+ if (timeout) {
+ t.tv_sec = timeout->tv_sec;
+ t.tv_nsec = timeout->tv_usec * 1000;
+ }
+ return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#else
+ struct __kernel_timespec t;
+
+ if (timeout) {
+ t.tv_sec = timeout->tv_sec;
+ t.tv_nsec = timeout->tv_usec * 1000;
+ }
+ return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
+#endif
+}
+
+static __attribute__((unused))
+int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout)
+{
+ return __sysret(sys_select(nfds, rfds, wfds, efds, timeout));
+}
+
+
+#endif /* _NOLIBC_SYS_SELECT_H */
diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h
index 4375d546ba58..5dd61030c991 100644
--- a/tools/include/nolibc/sys/timerfd.h
+++ b/tools/include/nolibc/sys/timerfd.h
@@ -34,7 +34,7 @@ int sys_timerfd_gettime(int fd, struct itimerspec *curr_value)
{
#if defined(__NR_timerfd_gettime)
return my_syscall2(__NR_timerfd_gettime, fd, curr_value);
-#elif defined(__NR_timerfd_gettime64)
+#else
struct __kernel_itimerspec kcurr_value;
int ret;
@@ -42,8 +42,6 @@ int sys_timerfd_gettime(int fd, struct itimerspec *curr_value)
__nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval);
__nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value);
return ret;
-#else
- return __nolibc_enosys(__func__, fd, curr_value);
#endif
}
@@ -60,7 +58,7 @@ int sys_timerfd_settime(int fd, int flags,
{
#if defined(__NR_timerfd_settime)
return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value);
-#elif defined(__NR_timerfd_settime64)
+#else
struct __kernel_itimerspec knew_value, kold_value;
int ret;
@@ -72,8 +70,6 @@ int sys_timerfd_settime(int fd, int flags,
__nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value);
}
return ret;
-#else
- return __nolibc_enosys(__func__, fd, flags, new_value, old_value);
#endif
}
diff --git a/tools/include/nolibc/sys/uio.h b/tools/include/nolibc/sys/uio.h
new file mode 100644
index 000000000000..7ad42b927d2f
--- /dev/null
+++ b/tools/include/nolibc/sys/uio.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * uio for NOLIBC
+ * Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
+ * Copyright (C) 2025 Intel Corporation
+ */
+
+/* make sure to include all global symbols */
+#include "../nolibc.h"
+
+#ifndef _NOLIBC_SYS_UIO_H
+#define _NOLIBC_SYS_UIO_H
+
+#include "../sys.h"
+#include <linux/uio.h>
+
+
+/*
+ * ssize_t readv(int fd, const struct iovec *iovec, int count);
+ */
+static __attribute__((unused))
+ssize_t sys_readv(int fd, const struct iovec *iovec, int count)
+{
+ return my_syscall3(__NR_readv, fd, iovec, count);
+}
+
+static __attribute__((unused))
+ssize_t readv(int fd, const struct iovec *iovec, int count)
+{
+ return __sysret(sys_readv(fd, iovec, count));
+}
+
+/*
+ * ssize_t writev(int fd, const struct iovec *iovec, int count);
+ */
+static __attribute__((unused))
+ssize_t sys_writev(int fd, const struct iovec *iovec, int count)
+{
+ return my_syscall3(__NR_writev, fd, iovec, count);
+}
+
+static __attribute__((unused))
+ssize_t writev(int fd, const struct iovec *iovec, int count)
+{
+ return __sysret(sys_writev(fd, iovec, count));
+}
+
+
+#endif /* _NOLIBC_SYS_UIO_H */
diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h
index 56ddb806da7f..9d9319ba92cb 100644
--- a/tools/include/nolibc/sys/wait.h
+++ b/tools/include/nolibc/sys/wait.h
@@ -16,28 +16,11 @@
/*
* pid_t wait(int *status);
- * pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage);
* pid_t waitpid(pid_t pid, int *status, int options);
* int waitid(idtype_t idtype, id_t id, siginfo_t *infop, int options);
*/
static __attribute__((unused))
-pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
-#ifdef __NR_wait4
- return my_syscall4(__NR_wait4, pid, status, options, rusage);
-#else
- return __nolibc_enosys(__func__, pid, status, options, rusage);
-#endif
-}
-
-static __attribute__((unused))
-pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage)
-{
- return __sysret(sys_wait4(pid, status, options, rusage));
-}
-
-static __attribute__((unused))
int sys_waitid(int which, pid_t pid, siginfo_t *infop, int options, struct rusage *rusage)
{
return my_syscall5(__NR_waitid, which, pid, infop, options, rusage);
@@ -82,23 +65,29 @@ pid_t waitpid(pid_t pid, int *status, int options)
switch (info.si_code) {
case 0:
- *status = 0;
+ if (status)
+ *status = 0;
break;
case CLD_EXITED:
- *status = (info.si_status & 0xff) << 8;
+ if (status)
+ *status = (info.si_status & 0xff) << 8;
break;
case CLD_KILLED:
- *status = info.si_status & 0x7f;
+ if (status)
+ *status = info.si_status & 0x7f;
break;
case CLD_DUMPED:
- *status = (info.si_status & 0x7f) | 0x80;
+ if (status)
+ *status = (info.si_status & 0x7f) | 0x80;
break;
case CLD_STOPPED:
case CLD_TRAPPED:
- *status = (info.si_status << 8) + 0x7f;
+ if (status)
+ *status = (info.si_status << 8) + 0x7f;
break;
case CLD_CONTINUED:
- *status = 0xffff;
+ if (status)
+ *status = 0xffff;
break;
default:
return -1;
diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h
index d02bc44d2643..48e78f8becf9 100644
--- a/tools/include/nolibc/time.h
+++ b/tools/include/nolibc/time.h
@@ -45,7 +45,7 @@ int sys_clock_getres(clockid_t clockid, struct timespec *res)
{
#if defined(__NR_clock_getres)
return my_syscall2(__NR_clock_getres, clockid, res);
-#elif defined(__NR_clock_getres_time64)
+#else
struct __kernel_timespec kres;
int ret;
@@ -53,8 +53,6 @@ int sys_clock_getres(clockid_t clockid, struct timespec *res)
if (res)
__nolibc_timespec_kernel_to_user(&kres, res);
return ret;
-#else
- return __nolibc_enosys(__func__, clockid, res);
#endif
}
@@ -69,7 +67,7 @@ int sys_clock_gettime(clockid_t clockid, struct timespec *tp)
{
#if defined(__NR_clock_gettime)
return my_syscall2(__NR_clock_gettime, clockid, tp);
-#elif defined(__NR_clock_gettime64)
+#else
struct __kernel_timespec ktp;
int ret;
@@ -77,8 +75,6 @@ int sys_clock_gettime(clockid_t clockid, struct timespec *tp)
if (tp)
__nolibc_timespec_kernel_to_user(&ktp, tp);
return ret;
-#else
- return __nolibc_enosys(__func__, clockid, tp);
#endif
}
@@ -93,13 +89,11 @@ int sys_clock_settime(clockid_t clockid, struct timespec *tp)
{
#if defined(__NR_clock_settime)
return my_syscall2(__NR_clock_settime, clockid, tp);
-#elif defined(__NR_clock_settime64)
+#else
struct __kernel_timespec ktp;
__nolibc_timespec_user_to_kernel(tp, &ktp);
return my_syscall2(__NR_clock_settime64, clockid, &ktp);
-#else
- return __nolibc_enosys(__func__, clockid, tp);
#endif
}
@@ -115,7 +109,7 @@ int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqt
{
#if defined(__NR_clock_nanosleep)
return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp);
-#elif defined(__NR_clock_nanosleep_time64)
+#else
struct __kernel_timespec krqtp, krmtp;
int ret;
@@ -124,8 +118,6 @@ int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqt
if (rmtp)
__nolibc_timespec_kernel_to_user(&krmtp, rmtp);
return ret;
-#else
- return __nolibc_enosys(__func__, clockid, flags, rqtp, rmtp);
#endif
}
@@ -133,7 +125,8 @@ static __attribute__((unused))
int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp,
struct timespec *rmtp)
{
- return __sysret(sys_clock_nanosleep(clockid, flags, rqtp, rmtp));
+ /* Directly return a positive error number */
+ return -sys_clock_nanosleep(clockid, flags, rqtp, rmtp);
}
static __inline__
@@ -145,7 +138,7 @@ double difftime(time_t time1, time_t time2)
static __inline__
int nanosleep(const struct timespec *rqtp, struct timespec *rmtp)
{
- return clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp);
+ return __sysret(sys_clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp));
}
@@ -198,7 +191,7 @@ int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value)
{
#if defined(__NR_timer_gettime)
return my_syscall2(__NR_timer_gettime, timerid, curr_value);
-#elif defined(__NR_timer_gettime64)
+#else
struct __kernel_itimerspec kcurr_value;
int ret;
@@ -206,8 +199,6 @@ int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value)
__nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval);
__nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value);
return ret;
-#else
- return __nolibc_enosys(__func__, timerid, curr_value);
#endif
}
@@ -223,7 +214,7 @@ int sys_timer_settime(timer_t timerid, int flags,
{
#if defined(__NR_timer_settime)
return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value);
-#elif defined(__NR_timer_settime64)
+#else
struct __kernel_itimerspec knew_value, kold_value;
int ret;
@@ -235,8 +226,6 @@ int sys_timer_settime(timer_t timerid, int flags,
__nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value);
}
return ret;
-#else
- return __nolibc_enosys(__func__, timerid, flags, new_value, old_value);
#endif
}
diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h
index 16c6e9ec9451..470a5f77bc0f 100644
--- a/tools/include/nolibc/types.h
+++ b/tools/include/nolibc/types.h
@@ -70,11 +70,6 @@
#define DT_LNK 0xa
#define DT_SOCK 0xc
-/* commonly an fd_set represents 256 FDs */
-#ifndef FD_SETSIZE
-#define FD_SETSIZE 256
-#endif
-
/* PATH_MAX and MAXPATHLEN are often used and found with plenty of different
* values.
*/
@@ -115,48 +110,6 @@
#define EXIT_SUCCESS 0
#define EXIT_FAILURE 1
-#define FD_SETIDXMASK (8 * sizeof(unsigned long))
-#define FD_SETBITMASK (8 * sizeof(unsigned long)-1)
-
-/* for select() */
-typedef struct {
- unsigned long fds[(FD_SETSIZE + FD_SETBITMASK) / FD_SETIDXMASK];
-} fd_set;
-
-#define FD_CLR(fd, set) do { \
- fd_set *__set = (set); \
- int __fd = (fd); \
- if (__fd >= 0) \
- __set->fds[__fd / FD_SETIDXMASK] &= \
- ~(1U << (__fd & FD_SETBITMASK)); \
- } while (0)
-
-#define FD_SET(fd, set) do { \
- fd_set *__set = (set); \
- int __fd = (fd); \
- if (__fd >= 0) \
- __set->fds[__fd / FD_SETIDXMASK] |= \
- 1 << (__fd & FD_SETBITMASK); \
- } while (0)
-
-#define FD_ISSET(fd, set) ({ \
- fd_set *__set = (set); \
- int __fd = (fd); \
- int __r = 0; \
- if (__fd >= 0) \
- __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \
-1U << (__fd & FD_SETBITMASK)); \
- __r; \
- })
-
-#define FD_ZERO(set) do { \
- fd_set *__set = (set); \
- int __idx; \
- int __size = (FD_SETSIZE+FD_SETBITMASK) / FD_SETIDXMASK;\
- for (__idx = 0; __idx < __size; __idx++) \
- __set->fds[__idx] = 0; \
- } while (0)
-
/* for getdents64() */
struct linux_dirent64 {
uint64_t d_ino;
diff --git a/tools/include/nolibc/unistd.h b/tools/include/nolibc/unistd.h
index 25bfc7732ec7..bb5e80f3f05d 100644
--- a/tools/include/nolibc/unistd.h
+++ b/tools/include/nolibc/unistd.h
@@ -33,7 +33,7 @@
static __attribute__((unused))
int sys_faccessat(int fd, const char *path, int amode, int flag)
{
- return my_syscall4(__NR_faccessat, fd, path, amode, flag);
+ return my_syscall4(__NR_faccessat, fd, path, amode, flag);
}
static __attribute__((unused))
@@ -54,7 +54,7 @@ int msleep(unsigned int msecs)
{
struct timeval my_timeval = { msecs / 1000, (msecs % 1000) * 1000 };
- if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ if (sys_select(0, NULL, NULL, NULL, &my_timeval) < 0)
return (my_timeval.tv_sec * 1000) +
(my_timeval.tv_usec / 1000) +
!!(my_timeval.tv_usec % 1000);
@@ -67,7 +67,7 @@ unsigned int sleep(unsigned int seconds)
{
struct timeval my_timeval = { seconds, 0 };
- if (sys_select(0, 0, 0, 0, &my_timeval) < 0)
+ if (sys_select(0, NULL, NULL, NULL, &my_timeval) < 0)
return my_timeval.tv_sec + !!my_timeval.tv_usec;
else
return 0;
@@ -78,7 +78,7 @@ int usleep(unsigned int usecs)
{
struct timeval my_timeval = { usecs / 1000000, usecs % 1000000 };
- return sys_select(0, 0, 0, 0, &my_timeval);
+ return sys_select(0, NULL, NULL, NULL, &my_timeval);
}
static __attribute__((unused))
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index 2892a45023af..04e0077fb4c9 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -852,8 +852,14 @@ __SYSCALL(__NR_removexattrat, sys_removexattrat)
#define __NR_open_tree_attr 467
__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
+/* fs/inode.c */
+#define __NR_file_getattr 468
+__SYSCALL(__NR_file_getattr, sys_file_getattr)
+#define __NR_file_setattr 469
+__SYSCALL(__NR_file_setattr, sys_file_setattr)
+
#undef __NR_syscalls
-#define __NR_syscalls 468
+#define __NR_syscalls 470
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index e63a71d3c607..3cd5cf15e3c9 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -597,35 +597,66 @@ struct drm_set_version {
int drm_dd_minor;
};
-/* DRM_IOCTL_GEM_CLOSE ioctl argument type */
+/**
+ * struct drm_gem_close - Argument for &DRM_IOCTL_GEM_CLOSE ioctl.
+ * @handle: Handle of the object to be closed.
+ * @pad: Padding.
+ *
+ * Releases the handle to an mm object.
+ */
struct drm_gem_close {
- /** Handle of the object to be closed. */
__u32 handle;
__u32 pad;
};
-/* DRM_IOCTL_GEM_FLINK ioctl argument type */
+/**
+ * struct drm_gem_flink - Argument for &DRM_IOCTL_GEM_FLINK ioctl.
+ * @handle: Handle for the object being named.
+ * @name: Returned global name.
+ *
+ * Create a global name for an object, returning the name.
+ *
+ * Note that the name does not hold a reference; when the object
+ * is freed, the name goes away.
+ */
struct drm_gem_flink {
- /** Handle for the object being named */
__u32 handle;
-
- /** Returned global name */
__u32 name;
};
-/* DRM_IOCTL_GEM_OPEN ioctl argument type */
+/**
+ * struct drm_gem_open - Argument for &DRM_IOCTL_GEM_OPEN ioctl.
+ * @name: Name of object being opened.
+ * @handle: Returned handle for the object.
+ * @size: Returned size of the object
+ *
+ * Open an object using the global name, returning a handle and the size.
+ *
+ * This handle (of course) holds a reference to the object, so the object
+ * will not go away until the handle is deleted.
+ */
struct drm_gem_open {
- /** Name of object being opened */
__u32 name;
-
- /** Returned handle for the object */
__u32 handle;
-
- /** Returned size of the object */
__u64 size;
};
/**
+ * struct drm_gem_change_handle - Argument for &DRM_IOCTL_GEM_CHANGE_HANDLE ioctl.
+ * @handle: The handle of a gem object.
+ * @new_handle: An available gem handle.
+ *
+ * This ioctl changes the handle of a GEM object to the specified one.
+ * The new handle must be unused. On success the old handle is closed
+ * and all further IOCTL should refer to the new handle only.
+ * Calls to DRM_IOCTL_PRIME_FD_TO_HANDLE will return the new handle.
+ */
+struct drm_gem_change_handle {
+ __u32 handle;
+ __u32 new_handle;
+};
+
+/**
* DRM_CAP_DUMB_BUFFER
*
* If set to 1, the driver supports creating dumb buffers via the
@@ -1309,6 +1340,14 @@ extern "C" {
*/
#define DRM_IOCTL_SET_CLIENT_NAME DRM_IOWR(0xD1, struct drm_set_client_name)
+/**
+ * DRM_IOCTL_GEM_CHANGE_HANDLE - Move an object to a different handle
+ *
+ * Some applications (notably CRIU) need objects to have specific gem handles.
+ * This ioctl changes the object at one gem handle to use a new gem handle.
+ */
+#define DRM_IOCTL_GEM_CHANGE_HANDLE DRM_IOWR(0xD2, struct drm_gem_change_handle)
+
/*
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x9f.
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 233de8677382..be7d8e060e10 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1026,6 +1026,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
BPF_MAP_TYPE_ARENA,
+ BPF_MAP_TYPE_INSN_ARRAY,
__MAX_BPF_MAP_TYPE
};
@@ -1430,6 +1431,9 @@ enum {
/* Do not translate kernel bpf_arena pointers to user pointers */
BPF_F_NO_USER_CONV = (1U << 18),
+
+/* Enable BPF ringbuf overwrite mode */
+ BPF_F_RB_OVERWRITE = (1U << 19),
};
/* Flags for BPF_PROG_QUERY. */
@@ -1522,6 +1526,12 @@ union bpf_attr {
* If provided, map_flags should have BPF_F_TOKEN_FD flag set.
*/
__s32 map_token_fd;
+
+ /* Hash of the program that has exclusive access to the map.
+ */
+ __aligned_u64 excl_prog_hash;
+ /* Size of the passed excl_prog_hash. */
+ __u32 excl_prog_hash_size;
};
struct { /* anonymous struct used by BPF_MAP_*_ELEM and BPF_MAP_FREEZE commands */
@@ -1605,6 +1615,16 @@ union bpf_attr {
* continuous.
*/
__u32 fd_array_cnt;
+ /* Pointer to a buffer containing the signature of the BPF
+ * program.
+ */
+ __aligned_u64 signature;
+ /* Size of the signature buffer in bytes. */
+ __u32 signature_size;
+ /* ID of the kernel keyring to be used for signature
+ * verification.
+ */
+ __s32 keyring_id;
};
struct { /* anonymous struct used by BPF_OBJ_* commands */
@@ -4875,7 +4895,7 @@ union bpf_attr {
*
* **-ENOENT** if the bpf_local_storage cannot be found.
*
- * long bpf_d_path(struct path *path, char *buf, u32 sz)
+ * long bpf_d_path(const struct path *path, char *buf, u32 sz)
* Description
* Return full path for given **struct path** object, which
* needs to be the kernel BTF *path* object. The path is
@@ -5602,7 +5622,7 @@ union bpf_attr {
* Return
* *sk* if casting is valid, or **NULL** otherwise.
*
- * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr)
+ * long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr)
* Description
* Get a dynptr to local memory *data*.
*
@@ -5645,7 +5665,7 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
- * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
+ * long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
@@ -5655,7 +5675,7 @@ union bpf_attr {
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
* *flags* is not 0.
*
- * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
+ * long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
@@ -5676,7 +5696,7 @@ union bpf_attr {
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
- * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
+ * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len)
* Description
* Get a pointer to the underlying dynptr data.
*
@@ -6215,6 +6235,7 @@ enum {
BPF_RB_RING_SIZE = 1,
BPF_RB_CONS_POS = 2,
BPF_RB_PROD_POS = 3,
+ BPF_RB_OVERWRITE_POS = 4,
};
/* BPF ring buffer constants */
@@ -6666,6 +6687,8 @@ struct bpf_map_info {
__u32 btf_value_type_id;
__u32 btf_vmlinux_id;
__u64 map_extra;
+ __aligned_u64 hash;
+ __u32 hash_size;
} __attribute__((aligned(8)));
struct bpf_btf_info {
@@ -7182,6 +7205,7 @@ enum {
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
SK_BPF_CB_FLAGS = 1009, /* Get or set sock ops flags in socket */
+ SK_BPF_BYPASS_PROT_MEM = 1010, /* Get or Set sk->sk_bypass_prot_mem */
};
enum {
@@ -7418,6 +7442,10 @@ struct bpf_timer {
__u64 __opaque[2];
} __attribute__((aligned(8)));
+struct bpf_task_work {
+ __u64 __opaque;
+} __attribute__((aligned(8)));
+
struct bpf_wq {
__u64 __opaque[2];
} __attribute__((aligned(8)));
@@ -7623,4 +7651,24 @@ enum bpf_kfunc_flags {
BPF_F_PAD_ZEROS = (1ULL << 0),
};
+/*
+ * Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type.
+ *
+ * Before the map is used the orig_off field should point to an
+ * instruction inside the program being loaded. The other fields
+ * must be set to 0.
+ *
+ * After the program is loaded, the xlated_off will be adjusted
+ * by the verifier to point to the index of the original instruction
+ * in the xlated program. If the instruction is deleted, it will
+ * be set to (u32)-1. The jitted_off will be set to the corresponding
+ * offset in the jitted image of the program.
+ */
+struct bpf_insn_array_value {
+ __u32 orig_off;
+ __u32 xlated_off;
+ __u32 jitted_off;
+ __u32 :32;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/genetlink.h b/tools/include/uapi/linux/genetlink.h
new file mode 100644
index 000000000000..ddba3ca01e39
--- /dev/null
+++ b/tools/include/uapi/linux/genetlink.h
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_GENERIC_NETLINK_H
+#define _UAPI__LINUX_GENERIC_NETLINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+#define GENL_NAMSIZ 16 /* length of family name */
+
+#define GENL_MIN_ID NLMSG_MIN_TYPE
+#define GENL_MAX_ID 1023
+
+struct genlmsghdr {
+ __u8 cmd;
+ __u8 version;
+ __u16 reserved;
+};
+
+#define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr))
+
+#define GENL_ADMIN_PERM 0x01
+#define GENL_CMD_CAP_DO 0x02
+#define GENL_CMD_CAP_DUMP 0x04
+#define GENL_CMD_CAP_HASPOL 0x08
+#define GENL_UNS_ADMIN_PERM 0x10
+
+/*
+ * List of reserved static generic netlink identifiers:
+ */
+#define GENL_ID_CTRL NLMSG_MIN_TYPE
+#define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1)
+#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2)
+/* must be last reserved + 1 */
+#define GENL_START_ALLOC (NLMSG_MIN_TYPE + 3)
+
+/**************************************************************************
+ * Controller
+ **************************************************************************/
+
+enum {
+ CTRL_CMD_UNSPEC,
+ CTRL_CMD_NEWFAMILY,
+ CTRL_CMD_DELFAMILY,
+ CTRL_CMD_GETFAMILY,
+ CTRL_CMD_NEWOPS,
+ CTRL_CMD_DELOPS,
+ CTRL_CMD_GETOPS,
+ CTRL_CMD_NEWMCAST_GRP,
+ CTRL_CMD_DELMCAST_GRP,
+ CTRL_CMD_GETMCAST_GRP, /* unused */
+ CTRL_CMD_GETPOLICY,
+ __CTRL_CMD_MAX,
+};
+
+#define CTRL_CMD_MAX (__CTRL_CMD_MAX - 1)
+
+enum {
+ CTRL_ATTR_UNSPEC,
+ CTRL_ATTR_FAMILY_ID,
+ CTRL_ATTR_FAMILY_NAME,
+ CTRL_ATTR_VERSION,
+ CTRL_ATTR_HDRSIZE,
+ CTRL_ATTR_MAXATTR,
+ CTRL_ATTR_OPS,
+ CTRL_ATTR_MCAST_GROUPS,
+ CTRL_ATTR_POLICY,
+ CTRL_ATTR_OP_POLICY,
+ CTRL_ATTR_OP,
+ __CTRL_ATTR_MAX,
+};
+
+#define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1)
+
+enum {
+ CTRL_ATTR_OP_UNSPEC,
+ CTRL_ATTR_OP_ID,
+ CTRL_ATTR_OP_FLAGS,
+ __CTRL_ATTR_OP_MAX,
+};
+
+#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1)
+
+enum {
+ CTRL_ATTR_MCAST_GRP_UNSPEC,
+ CTRL_ATTR_MCAST_GRP_NAME,
+ CTRL_ATTR_MCAST_GRP_ID,
+ __CTRL_ATTR_MCAST_GRP_MAX,
+};
+
+#define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)
+
+enum {
+ CTRL_ATTR_POLICY_UNSPEC,
+ CTRL_ATTR_POLICY_DO,
+ CTRL_ATTR_POLICY_DUMP,
+
+ __CTRL_ATTR_POLICY_DUMP_MAX,
+ CTRL_ATTR_POLICY_DUMP_MAX = __CTRL_ATTR_POLICY_DUMP_MAX - 1
+};
+
+#define CTRL_ATTR_POLICY_MAX (__CTRL_ATTR_POLICY_DUMP_MAX - 1)
+
+#endif /* _UAPI__LINUX_GENERIC_NETLINK_H */
diff --git a/tools/include/uapi/linux/if_addr.h b/tools/include/uapi/linux/if_addr.h
new file mode 100644
index 000000000000..aa7958b4e41d
--- /dev/null
+++ b/tools/include/uapi/linux/if_addr.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_IF_ADDR_H
+#define _UAPI__LINUX_IF_ADDR_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+struct ifaddrmsg {
+ __u8 ifa_family;
+ __u8 ifa_prefixlen; /* The prefix length */
+ __u8 ifa_flags; /* Flags */
+ __u8 ifa_scope; /* Address scope */
+ __u32 ifa_index; /* Link index */
+};
+
+/*
+ * Important comment:
+ * IFA_ADDRESS is prefix address, rather than local interface address.
+ * It makes no difference for normally configured broadcast interfaces,
+ * but for point-to-point IFA_ADDRESS is DESTINATION address,
+ * local address is supplied in IFA_LOCAL attribute.
+ *
+ * IFA_FLAGS is a u32 attribute that extends the u8 field ifa_flags.
+ * If present, the value from struct ifaddrmsg will be ignored.
+ */
+enum {
+ IFA_UNSPEC,
+ IFA_ADDRESS,
+ IFA_LOCAL,
+ IFA_LABEL,
+ IFA_BROADCAST,
+ IFA_ANYCAST,
+ IFA_CACHEINFO,
+ IFA_MULTICAST,
+ IFA_FLAGS,
+ IFA_RT_PRIORITY, /* u32, priority/metric for prefix route */
+ IFA_TARGET_NETNSID,
+ IFA_PROTO, /* u8, address protocol */
+ __IFA_MAX,
+};
+
+#define IFA_MAX (__IFA_MAX - 1)
+
+/* ifa_flags */
+#define IFA_F_SECONDARY 0x01
+#define IFA_F_TEMPORARY IFA_F_SECONDARY
+
+#define IFA_F_NODAD 0x02
+#define IFA_F_OPTIMISTIC 0x04
+#define IFA_F_DADFAILED 0x08
+#define IFA_F_HOMEADDRESS 0x10
+#define IFA_F_DEPRECATED 0x20
+#define IFA_F_TENTATIVE 0x40
+#define IFA_F_PERMANENT 0x80
+#define IFA_F_MANAGETEMPADDR 0x100
+#define IFA_F_NOPREFIXROUTE 0x200
+#define IFA_F_MCAUTOJOIN 0x400
+#define IFA_F_STABLE_PRIVACY 0x800
+
+struct ifa_cacheinfo {
+ __u32 ifa_prefered;
+ __u32 ifa_valid;
+ __u32 cstamp; /* created timestamp, hundredths of seconds */
+ __u32 tstamp; /* updated timestamp, hundredths of seconds */
+};
+
+/* backwards compatibility for userspace */
+#ifndef __KERNEL__
+#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg))))
+#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg))
+#endif
+
+/* ifa_proto */
+#define IFAPROT_UNSPEC 0
+#define IFAPROT_KERNEL_LO 1 /* loopback */
+#define IFAPROT_KERNEL_RA 2 /* set by kernel from router announcement */
+#define IFAPROT_KERNEL_LL 3 /* link-local set by kernel */
+
+#endif
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 7415a3863891..52f6000ab020 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -178,6 +178,7 @@ struct kvm_xen_exit {
#define KVM_EXIT_NOTIFY 37
#define KVM_EXIT_LOONGARCH_IOCSR 38
#define KVM_EXIT_MEMORY_FAULT 39
+#define KVM_EXIT_TDX 40
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -447,6 +448,31 @@ struct kvm_run {
__u64 gpa;
__u64 size;
} memory_fault;
+ /* KVM_EXIT_TDX */
+ struct {
+ __u64 flags;
+ __u64 nr;
+ union {
+ struct {
+ __u64 ret;
+ __u64 data[5];
+ } unknown;
+ struct {
+ __u64 ret;
+ __u64 gpa;
+ __u64 size;
+ } get_quote;
+ struct {
+ __u64 ret;
+ __u64 leaf;
+ __u64 r11, r12, r13, r14;
+ } get_tdvmcall_info;
+ struct {
+ __u64 ret;
+ __u64 vector;
+ } setup_event_notify;
+ };
+ } tdx;
/* Fix the size of the union. */
char padding[256];
};
@@ -935,6 +961,8 @@ struct kvm_enable_cap {
#define KVM_CAP_ARM_EL2 240
#define KVM_CAP_ARM_EL2_E2H0 241
#define KVM_CAP_RISCV_MP_STATE_RESET 242
+#define KVM_CAP_ARM_CACHEABLE_PFNMAP_SUPPORTED 243
+#define KVM_CAP_GUEST_MEMFD_FLAGS 244
struct kvm_irq_routing_irqchip {
__u32 irqchip;
@@ -1571,6 +1599,8 @@ struct kvm_memory_attributes {
#define KVM_MEMORY_ATTRIBUTE_PRIVATE (1ULL << 3)
#define KVM_CREATE_GUEST_MEMFD _IOWR(KVMIO, 0xd4, struct kvm_create_guest_memfd)
+#define GUEST_MEMFD_FLAG_MMAP (1ULL << 0)
+#define GUEST_MEMFD_FLAG_INIT_SHARED (1ULL << 1)
struct kvm_create_guest_memfd {
__u64 size;
diff --git a/tools/include/uapi/linux/neighbour.h b/tools/include/uapi/linux/neighbour.h
new file mode 100644
index 000000000000..c34a81245f87
--- /dev/null
+++ b/tools/include/uapi/linux/neighbour.h
@@ -0,0 +1,229 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NEIGHBOUR_H
+#define _UAPI__LINUX_NEIGHBOUR_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+struct ndmsg {
+ __u8 ndm_family;
+ __u8 ndm_pad1;
+ __u16 ndm_pad2;
+ __s32 ndm_ifindex;
+ __u16 ndm_state;
+ __u8 ndm_flags;
+ __u8 ndm_type;
+};
+
+enum {
+ NDA_UNSPEC,
+ NDA_DST,
+ NDA_LLADDR,
+ NDA_CACHEINFO,
+ NDA_PROBES,
+ NDA_VLAN,
+ NDA_PORT,
+ NDA_VNI,
+ NDA_IFINDEX,
+ NDA_MASTER,
+ NDA_LINK_NETNSID,
+ NDA_SRC_VNI,
+ NDA_PROTOCOL, /* Originator of entry */
+ NDA_NH_ID,
+ NDA_FDB_EXT_ATTRS,
+ NDA_FLAGS_EXT,
+ NDA_NDM_STATE_MASK,
+ NDA_NDM_FLAGS_MASK,
+ __NDA_MAX
+};
+
+#define NDA_MAX (__NDA_MAX - 1)
+
+/*
+ * Neighbor Cache Entry Flags
+ */
+
+#define NTF_USE (1 << 0)
+#define NTF_SELF (1 << 1)
+#define NTF_MASTER (1 << 2)
+#define NTF_PROXY (1 << 3) /* == ATF_PUBL */
+#define NTF_EXT_LEARNED (1 << 4)
+#define NTF_OFFLOADED (1 << 5)
+#define NTF_STICKY (1 << 6)
+#define NTF_ROUTER (1 << 7)
+/* Extended flags under NDA_FLAGS_EXT: */
+#define NTF_EXT_MANAGED (1 << 0)
+#define NTF_EXT_LOCKED (1 << 1)
+#define NTF_EXT_EXT_VALIDATED (1 << 2)
+
+/*
+ * Neighbor Cache Entry States.
+ */
+
+#define NUD_INCOMPLETE 0x01
+#define NUD_REACHABLE 0x02
+#define NUD_STALE 0x04
+#define NUD_DELAY 0x08
+#define NUD_PROBE 0x10
+#define NUD_FAILED 0x20
+
+/* Dummy states */
+#define NUD_NOARP 0x40
+#define NUD_PERMANENT 0x80
+#define NUD_NONE 0x00
+
+/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change and make no
+ * address resolution or NUD.
+ *
+ * NUD_PERMANENT also cannot be deleted by garbage collectors. This holds true
+ * for dynamic entries with NTF_EXT_LEARNED flag as well. However, upon carrier
+ * down event, NUD_PERMANENT entries are not flushed whereas NTF_EXT_LEARNED
+ * flagged entries explicitly are (which is also consistent with the routing
+ * subsystem).
+ *
+ * When NTF_EXT_LEARNED is set for a bridge fdb entry the different cache entry
+ * states don't make sense and thus are ignored. Such entries don't age and
+ * can roam.
+ *
+ * NTF_EXT_MANAGED flagged neigbor entries are managed by the kernel on behalf
+ * of a user space control plane, and automatically refreshed so that (if
+ * possible) they remain in NUD_REACHABLE state.
+ *
+ * NTF_EXT_LOCKED flagged bridge FDB entries are entries generated by the
+ * bridge in response to a host trying to communicate via a locked bridge port
+ * with MAB enabled. Their purpose is to notify user space that a host requires
+ * authentication.
+ *
+ * NTF_EXT_EXT_VALIDATED flagged neighbor entries were externally validated by
+ * a user space control plane. The kernel will not remove or invalidate them,
+ * but it can probe them and notify user space when they become reachable.
+ */
+
+struct nda_cacheinfo {
+ __u32 ndm_confirmed;
+ __u32 ndm_used;
+ __u32 ndm_updated;
+ __u32 ndm_refcnt;
+};
+
+/*****************************************************************
+ * Neighbour tables specific messages.
+ *
+ * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the
+ * NLM_F_DUMP flag set. Every neighbour table configuration is
+ * spread over multiple messages to avoid running into message
+ * size limits on systems with many interfaces. The first message
+ * in the sequence transports all not device specific data such as
+ * statistics, configuration, and the default parameter set.
+ * This message is followed by 0..n messages carrying device
+ * specific parameter sets.
+ * Although the ordering should be sufficient, NDTA_NAME can be
+ * used to identify sequences. The initial message can be identified
+ * by checking for NDTA_CONFIG. The device specific messages do
+ * not contain this TLV but have NDTPA_IFINDEX set to the
+ * corresponding interface index.
+ *
+ * To change neighbour table attributes, send RTM_SETNEIGHTBL
+ * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3],
+ * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked
+ * otherwise. Device specific parameter sets can be changed by
+ * setting NDTPA_IFINDEX to the interface index of the corresponding
+ * device.
+ ****/
+
+struct ndt_stats {
+ __u64 ndts_allocs;
+ __u64 ndts_destroys;
+ __u64 ndts_hash_grows;
+ __u64 ndts_res_failed;
+ __u64 ndts_lookups;
+ __u64 ndts_hits;
+ __u64 ndts_rcv_probes_mcast;
+ __u64 ndts_rcv_probes_ucast;
+ __u64 ndts_periodic_gc_runs;
+ __u64 ndts_forced_gc_runs;
+ __u64 ndts_table_fulls;
+};
+
+enum {
+ NDTPA_UNSPEC,
+ NDTPA_IFINDEX, /* u32, unchangeable */
+ NDTPA_REFCNT, /* u32, read-only */
+ NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */
+ NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */
+ NDTPA_RETRANS_TIME, /* u64, msecs */
+ NDTPA_GC_STALETIME, /* u64, msecs */
+ NDTPA_DELAY_PROBE_TIME, /* u64, msecs */
+ NDTPA_QUEUE_LEN, /* u32 */
+ NDTPA_APP_PROBES, /* u32 */
+ NDTPA_UCAST_PROBES, /* u32 */
+ NDTPA_MCAST_PROBES, /* u32 */
+ NDTPA_ANYCAST_DELAY, /* u64, msecs */
+ NDTPA_PROXY_DELAY, /* u64, msecs */
+ NDTPA_PROXY_QLEN, /* u32 */
+ NDTPA_LOCKTIME, /* u64, msecs */
+ NDTPA_QUEUE_LENBYTES, /* u32 */
+ NDTPA_MCAST_REPROBES, /* u32 */
+ NDTPA_PAD,
+ NDTPA_INTERVAL_PROBE_TIME_MS, /* u64, msecs */
+ __NDTPA_MAX
+};
+#define NDTPA_MAX (__NDTPA_MAX - 1)
+
+struct ndtmsg {
+ __u8 ndtm_family;
+ __u8 ndtm_pad1;
+ __u16 ndtm_pad2;
+};
+
+struct ndt_config {
+ __u16 ndtc_key_len;
+ __u16 ndtc_entry_size;
+ __u32 ndtc_entries;
+ __u32 ndtc_last_flush; /* delta to now in msecs */
+ __u32 ndtc_last_rand; /* delta to now in msecs */
+ __u32 ndtc_hash_rnd;
+ __u32 ndtc_hash_mask;
+ __u32 ndtc_hash_chain_gc;
+ __u32 ndtc_proxy_qlen;
+};
+
+enum {
+ NDTA_UNSPEC,
+ NDTA_NAME, /* char *, unchangeable */
+ NDTA_THRESH1, /* u32 */
+ NDTA_THRESH2, /* u32 */
+ NDTA_THRESH3, /* u32 */
+ NDTA_CONFIG, /* struct ndt_config, read-only */
+ NDTA_PARMS, /* nested TLV NDTPA_* */
+ NDTA_STATS, /* struct ndt_stats, read-only */
+ NDTA_GC_INTERVAL, /* u64, msecs */
+ NDTA_PAD,
+ __NDTA_MAX
+};
+#define NDTA_MAX (__NDTA_MAX - 1)
+
+ /* FDB activity notification bits used in NFEA_ACTIVITY_NOTIFY:
+ * - FDB_NOTIFY_BIT - notify on activity/expire for any entry
+ * - FDB_NOTIFY_INACTIVE_BIT - mark as inactive to avoid multiple notifications
+ */
+enum {
+ FDB_NOTIFY_BIT = (1 << 0),
+ FDB_NOTIFY_INACTIVE_BIT = (1 << 1)
+};
+
+/* embedded into NDA_FDB_EXT_ATTRS:
+ * [NDA_FDB_EXT_ATTRS] = {
+ * [NFEA_ACTIVITY_NOTIFY]
+ * ...
+ * }
+ */
+enum {
+ NFEA_UNSPEC,
+ NFEA_ACTIVITY_NOTIFY,
+ NFEA_DONT_REFRESH,
+ __NFEA_MAX
+};
+#define NFEA_MAX (__NFEA_MAX - 1)
+
+#endif
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 48eb49aa03d4..e0b579a1df4f 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -2,6 +2,7 @@
/* Do not edit directly, auto-generated from: */
/* Documentation/netlink/specs/netdev.yaml */
/* YNL-GEN uapi header */
+/* To regenerate run: tools/net/ynl/ynl-regen.sh */
#ifndef _UAPI_LINUX_NETDEV_H
#define _UAPI_LINUX_NETDEV_H
@@ -80,6 +81,7 @@ enum netdev_qstats_scope {
enum netdev_napi_threaded {
NETDEV_NAPI_THREADED_DISABLED,
NETDEV_NAPI_THREADED_ENABLED,
+ NETDEV_NAPI_THREADED_BUSY_POLL,
};
enum {
diff --git a/tools/include/uapi/linux/netfilter.h b/tools/include/uapi/linux/netfilter.h
new file mode 100644
index 000000000000..5a79ccb76701
--- /dev/null
+++ b/tools/include/uapi/linux/netfilter.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_NETFILTER_H
+#define _UAPI__LINUX_NETFILTER_H
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+/* Responses from hook functions. */
+#define NF_DROP 0
+#define NF_ACCEPT 1
+#define NF_STOLEN 2
+#define NF_QUEUE 3
+#define NF_REPEAT 4
+#define NF_STOP 5 /* Deprecated, for userspace nf_queue compatibility. */
+#define NF_MAX_VERDICT NF_STOP
+
+/* we overload the higher bits for encoding auxiliary data such as the queue
+ * number or errno values. Not nice, but better than additional function
+ * arguments. */
+#define NF_VERDICT_MASK 0x000000ff
+
+/* extra verdict flags have mask 0x0000ff00 */
+#define NF_VERDICT_FLAG_QUEUE_BYPASS 0x00008000
+
+/* queue number (NF_QUEUE) or errno (NF_DROP) */
+#define NF_VERDICT_QMASK 0xffff0000
+#define NF_VERDICT_QBITS 16
+
+#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)
+
+#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
+
+/* only for userspace compatibility */
+#ifndef __KERNEL__
+
+/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
+#define NF_VERDICT_BITS 16
+#endif
+
+enum nf_inet_hooks {
+ NF_INET_PRE_ROUTING,
+ NF_INET_LOCAL_IN,
+ NF_INET_FORWARD,
+ NF_INET_LOCAL_OUT,
+ NF_INET_POST_ROUTING,
+ NF_INET_NUMHOOKS,
+ NF_INET_INGRESS = NF_INET_NUMHOOKS,
+};
+
+enum nf_dev_hooks {
+ NF_NETDEV_INGRESS,
+ NF_NETDEV_EGRESS,
+ NF_NETDEV_NUMHOOKS
+};
+
+enum {
+ NFPROTO_UNSPEC = 0,
+ NFPROTO_INET = 1,
+ NFPROTO_IPV4 = 2,
+ NFPROTO_ARP = 3,
+ NFPROTO_NETDEV = 5,
+ NFPROTO_BRIDGE = 7,
+ NFPROTO_IPV6 = 10,
+#ifndef __KERNEL__ /* no longer supported by kernel */
+ NFPROTO_DECNET = 12,
+#endif
+ NFPROTO_NUMPROTO,
+};
+
+union nf_inet_addr {
+ __u32 all[4];
+ __be32 ip;
+ __be32 ip6[4];
+ struct in_addr in;
+ struct in6_addr in6;
+};
+
+#endif /* _UAPI__LINUX_NETFILTER_H */
diff --git a/tools/include/uapi/linux/netfilter_arp.h b/tools/include/uapi/linux/netfilter_arp.h
new file mode 100644
index 000000000000..791dfc5ae907
--- /dev/null
+++ b/tools/include/uapi/linux/netfilter_arp.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-1.0+ WITH Linux-syscall-note */
+#ifndef __LINUX_ARP_NETFILTER_H
+#define __LINUX_ARP_NETFILTER_H
+
+/* ARP-specific defines for netfilter.
+ * (C)2002 Rusty Russell IBM -- This code is GPL.
+ */
+
+#include <linux/netfilter.h>
+
+/* There is no PF_ARP. */
+#define NF_ARP 0
+
+/* ARP Hooks */
+#define NF_ARP_IN 0
+#define NF_ARP_OUT 1
+#define NF_ARP_FORWARD 2
+
+#ifndef __KERNEL__
+#define NF_ARP_NUMHOOKS 3
+#endif
+
+#endif /* __LINUX_ARP_NETFILTER_H */
diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h
index 34127653fd00..a25e38d1c874 100644
--- a/tools/include/uapi/linux/nsfs.h
+++ b/tools/include/uapi/linux/nsfs.h
@@ -16,8 +16,6 @@
#define NS_GET_NSTYPE _IO(NSIO, 0x3)
/* Get owner UID (in the caller's user namespace) for a user namespace */
#define NS_GET_OWNER_UID _IO(NSIO, 0x4)
-/* Get the id for a mount namespace */
-#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64)
/* Translate pid from target pid namespace into the caller's pid namespace. */
#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int)
/* Return thread-group leader id of pid in the callers pid namespace. */
@@ -42,4 +40,89 @@ struct mnt_ns_info {
/* Get previous namespace. */
#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info)
+/* Retrieve namespace identifiers. */
+#define NS_GET_MNTNS_ID _IOR(NSIO, 5, __u64)
+#define NS_GET_ID _IOR(NSIO, 13, __u64)
+
+enum init_ns_ino {
+ IPC_NS_INIT_INO = 0xEFFFFFFFU,
+ UTS_NS_INIT_INO = 0xEFFFFFFEU,
+ USER_NS_INIT_INO = 0xEFFFFFFDU,
+ PID_NS_INIT_INO = 0xEFFFFFFCU,
+ CGROUP_NS_INIT_INO = 0xEFFFFFFBU,
+ TIME_NS_INIT_INO = 0xEFFFFFFAU,
+ NET_NS_INIT_INO = 0xEFFFFFF9U,
+ MNT_NS_INIT_INO = 0xEFFFFFF8U,
+#ifdef __KERNEL__
+ MNT_NS_ANON_INO = 0xEFFFFFF7U,
+#endif
+};
+
+struct nsfs_file_handle {
+ __u64 ns_id;
+ __u32 ns_type;
+ __u32 ns_inum;
+};
+
+#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
+#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
+
+enum init_ns_id {
+ IPC_NS_INIT_ID = 1ULL,
+ UTS_NS_INIT_ID = 2ULL,
+ USER_NS_INIT_ID = 3ULL,
+ PID_NS_INIT_ID = 4ULL,
+ CGROUP_NS_INIT_ID = 5ULL,
+ TIME_NS_INIT_ID = 6ULL,
+ NET_NS_INIT_ID = 7ULL,
+ MNT_NS_INIT_ID = 8ULL,
+#ifdef __KERNEL__
+ NS_LAST_INIT_ID = MNT_NS_INIT_ID,
+#endif
+};
+
+enum ns_type {
+ TIME_NS = (1ULL << 7), /* CLONE_NEWTIME */
+ MNT_NS = (1ULL << 17), /* CLONE_NEWNS */
+ CGROUP_NS = (1ULL << 25), /* CLONE_NEWCGROUP */
+ UTS_NS = (1ULL << 26), /* CLONE_NEWUTS */
+ IPC_NS = (1ULL << 27), /* CLONE_NEWIPC */
+ USER_NS = (1ULL << 28), /* CLONE_NEWUSER */
+ PID_NS = (1ULL << 29), /* CLONE_NEWPID */
+ NET_NS = (1ULL << 30), /* CLONE_NEWNET */
+};
+
+/**
+ * struct ns_id_req - namespace ID request structure
+ * @size: size of this structure
+ * @spare: reserved for future use
+ * @filter: filter mask
+ * @ns_id: last namespace id
+ * @user_ns_id: owning user namespace ID
+ *
+ * Structure for passing namespace ID and miscellaneous parameters to
+ * statns(2) and listns(2).
+ *
+ * For statns(2) @param represents the request mask.
+ * For listns(2) @param represents the last listed mount id (or zero).
+ */
+struct ns_id_req {
+ __u32 size;
+ __u32 spare;
+ __u64 ns_id;
+ struct /* listns */ {
+ __u32 ns_type;
+ __u32 spare2;
+ __u64 user_ns_id;
+ };
+};
+
+/*
+ * Special @user_ns_id value that can be passed to listns()
+ */
+#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */
+
+/* List of all ns_id_req versions. */
+#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */
+
#endif /* __LINUX_NSFS_H */
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 78a362b80027..c44a8fb3e418 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -382,6 +382,7 @@ enum perf_event_read_format {
#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */
#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */
#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */
+#define PERF_ATTR_SIZE_VER9 144 /* add: config4 */
/*
* 'struct perf_event_attr' contains various attributes that define
@@ -463,7 +464,9 @@ struct perf_event_attr {
inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */
remove_on_exec : 1, /* event is removed from task on exec */
sigtrap : 1, /* send synchronous SIGTRAP on event */
- __reserved_1 : 26;
+ defer_callchain: 1, /* request PERF_RECORD_CALLCHAIN_DEFERRED records */
+ defer_output : 1, /* output PERF_RECORD_CALLCHAIN_DEFERRED records */
+ __reserved_1 : 24;
union {
__u32 wakeup_events; /* wake up every n events */
@@ -543,6 +546,7 @@ struct perf_event_attr {
__u64 sig_data;
__u64 config3; /* extension of config2 */
+ __u64 config4; /* extension of config3 */
};
/*
@@ -1239,6 +1243,22 @@ enum perf_event_type {
*/
PERF_RECORD_AUX_OUTPUT_HW_ID = 21,
+ /*
+ * This user callchain capture was deferred until shortly before
+ * returning to user space. Previous samples would have kernel
+ * callchains only and they need to be stitched with this to make full
+ * callchains.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 cookie;
+ * u64 nr;
+ * u64 ips[nr];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_CALLCHAIN_DEFERRED = 22,
+
PERF_RECORD_MAX, /* non-ABI */
};
@@ -1269,6 +1289,7 @@ enum perf_callchain_context {
PERF_CONTEXT_HV = (__u64)-32,
PERF_CONTEXT_KERNEL = (__u64)-128,
PERF_CONTEXT_USER = (__u64)-512,
+ PERF_CONTEXT_USER_DEFERRED = (__u64)-640,
PERF_CONTEXT_GUEST = (__u64)-2048,
PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176,
diff --git a/tools/include/uapi/linux/rtnetlink.h b/tools/include/uapi/linux/rtnetlink.h
new file mode 100644
index 000000000000..dab9493c791b
--- /dev/null
+++ b/tools/include/uapi/linux/rtnetlink.h
@@ -0,0 +1,848 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI__LINUX_RTNETLINK_H
+#define _UAPI__LINUX_RTNETLINK_H
+
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/if_link.h>
+#include <linux/if_addr.h>
+#include <linux/neighbour.h>
+
+/* rtnetlink families. Values up to 127 are reserved for real address
+ * families, values above 128 may be used arbitrarily.
+ */
+#define RTNL_FAMILY_IPMR 128
+#define RTNL_FAMILY_IP6MR 129
+#define RTNL_FAMILY_MAX 129
+
+/****
+ * Routing/neighbour discovery messages.
+ ****/
+
+/* Types of messages */
+
+enum {
+ RTM_BASE = 16,
+#define RTM_BASE RTM_BASE
+
+ RTM_NEWLINK = 16,
+#define RTM_NEWLINK RTM_NEWLINK
+ RTM_DELLINK,
+#define RTM_DELLINK RTM_DELLINK
+ RTM_GETLINK,
+#define RTM_GETLINK RTM_GETLINK
+ RTM_SETLINK,
+#define RTM_SETLINK RTM_SETLINK
+
+ RTM_NEWADDR = 20,
+#define RTM_NEWADDR RTM_NEWADDR
+ RTM_DELADDR,
+#define RTM_DELADDR RTM_DELADDR
+ RTM_GETADDR,
+#define RTM_GETADDR RTM_GETADDR
+
+ RTM_NEWROUTE = 24,
+#define RTM_NEWROUTE RTM_NEWROUTE
+ RTM_DELROUTE,
+#define RTM_DELROUTE RTM_DELROUTE
+ RTM_GETROUTE,
+#define RTM_GETROUTE RTM_GETROUTE
+
+ RTM_NEWNEIGH = 28,
+#define RTM_NEWNEIGH RTM_NEWNEIGH
+ RTM_DELNEIGH,
+#define RTM_DELNEIGH RTM_DELNEIGH
+ RTM_GETNEIGH,
+#define RTM_GETNEIGH RTM_GETNEIGH
+
+ RTM_NEWRULE = 32,
+#define RTM_NEWRULE RTM_NEWRULE
+ RTM_DELRULE,
+#define RTM_DELRULE RTM_DELRULE
+ RTM_GETRULE,
+#define RTM_GETRULE RTM_GETRULE
+
+ RTM_NEWQDISC = 36,
+#define RTM_NEWQDISC RTM_NEWQDISC
+ RTM_DELQDISC,
+#define RTM_DELQDISC RTM_DELQDISC
+ RTM_GETQDISC,
+#define RTM_GETQDISC RTM_GETQDISC
+
+ RTM_NEWTCLASS = 40,
+#define RTM_NEWTCLASS RTM_NEWTCLASS
+ RTM_DELTCLASS,
+#define RTM_DELTCLASS RTM_DELTCLASS
+ RTM_GETTCLASS,
+#define RTM_GETTCLASS RTM_GETTCLASS
+
+ RTM_NEWTFILTER = 44,
+#define RTM_NEWTFILTER RTM_NEWTFILTER
+ RTM_DELTFILTER,
+#define RTM_DELTFILTER RTM_DELTFILTER
+ RTM_GETTFILTER,
+#define RTM_GETTFILTER RTM_GETTFILTER
+
+ RTM_NEWACTION = 48,
+#define RTM_NEWACTION RTM_NEWACTION
+ RTM_DELACTION,
+#define RTM_DELACTION RTM_DELACTION
+ RTM_GETACTION,
+#define RTM_GETACTION RTM_GETACTION
+
+ RTM_NEWPREFIX = 52,
+#define RTM_NEWPREFIX RTM_NEWPREFIX
+
+ RTM_NEWMULTICAST = 56,
+#define RTM_NEWMULTICAST RTM_NEWMULTICAST
+ RTM_DELMULTICAST,
+#define RTM_DELMULTICAST RTM_DELMULTICAST
+ RTM_GETMULTICAST,
+#define RTM_GETMULTICAST RTM_GETMULTICAST
+
+ RTM_NEWANYCAST = 60,
+#define RTM_NEWANYCAST RTM_NEWANYCAST
+ RTM_DELANYCAST,
+#define RTM_DELANYCAST RTM_DELANYCAST
+ RTM_GETANYCAST,
+#define RTM_GETANYCAST RTM_GETANYCAST
+
+ RTM_NEWNEIGHTBL = 64,
+#define RTM_NEWNEIGHTBL RTM_NEWNEIGHTBL
+ RTM_GETNEIGHTBL = 66,
+#define RTM_GETNEIGHTBL RTM_GETNEIGHTBL
+ RTM_SETNEIGHTBL,
+#define RTM_SETNEIGHTBL RTM_SETNEIGHTBL
+
+ RTM_NEWNDUSEROPT = 68,
+#define RTM_NEWNDUSEROPT RTM_NEWNDUSEROPT
+
+ RTM_NEWADDRLABEL = 72,
+#define RTM_NEWADDRLABEL RTM_NEWADDRLABEL
+ RTM_DELADDRLABEL,
+#define RTM_DELADDRLABEL RTM_DELADDRLABEL
+ RTM_GETADDRLABEL,
+#define RTM_GETADDRLABEL RTM_GETADDRLABEL
+
+ RTM_GETDCB = 78,
+#define RTM_GETDCB RTM_GETDCB
+ RTM_SETDCB,
+#define RTM_SETDCB RTM_SETDCB
+
+ RTM_NEWNETCONF = 80,
+#define RTM_NEWNETCONF RTM_NEWNETCONF
+ RTM_DELNETCONF,
+#define RTM_DELNETCONF RTM_DELNETCONF
+ RTM_GETNETCONF = 82,
+#define RTM_GETNETCONF RTM_GETNETCONF
+
+ RTM_NEWMDB = 84,
+#define RTM_NEWMDB RTM_NEWMDB
+ RTM_DELMDB = 85,
+#define RTM_DELMDB RTM_DELMDB
+ RTM_GETMDB = 86,
+#define RTM_GETMDB RTM_GETMDB
+
+ RTM_NEWNSID = 88,
+#define RTM_NEWNSID RTM_NEWNSID
+ RTM_DELNSID = 89,
+#define RTM_DELNSID RTM_DELNSID
+ RTM_GETNSID = 90,
+#define RTM_GETNSID RTM_GETNSID
+
+ RTM_NEWSTATS = 92,
+#define RTM_NEWSTATS RTM_NEWSTATS
+ RTM_GETSTATS = 94,
+#define RTM_GETSTATS RTM_GETSTATS
+ RTM_SETSTATS,
+#define RTM_SETSTATS RTM_SETSTATS
+
+ RTM_NEWCACHEREPORT = 96,
+#define RTM_NEWCACHEREPORT RTM_NEWCACHEREPORT
+
+ RTM_NEWCHAIN = 100,
+#define RTM_NEWCHAIN RTM_NEWCHAIN
+ RTM_DELCHAIN,
+#define RTM_DELCHAIN RTM_DELCHAIN
+ RTM_GETCHAIN,
+#define RTM_GETCHAIN RTM_GETCHAIN
+
+ RTM_NEWNEXTHOP = 104,
+#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP
+ RTM_DELNEXTHOP,
+#define RTM_DELNEXTHOP RTM_DELNEXTHOP
+ RTM_GETNEXTHOP,
+#define RTM_GETNEXTHOP RTM_GETNEXTHOP
+
+ RTM_NEWLINKPROP = 108,
+#define RTM_NEWLINKPROP RTM_NEWLINKPROP
+ RTM_DELLINKPROP,
+#define RTM_DELLINKPROP RTM_DELLINKPROP
+ RTM_GETLINKPROP,
+#define RTM_GETLINKPROP RTM_GETLINKPROP
+
+ RTM_NEWVLAN = 112,
+#define RTM_NEWVLAN RTM_NEWVLAN
+ RTM_DELVLAN,
+#define RTM_DELVLAN RTM_DELVLAN
+ RTM_GETVLAN,
+#define RTM_GETVLAN RTM_GETVLAN
+
+ RTM_NEWNEXTHOPBUCKET = 116,
+#define RTM_NEWNEXTHOPBUCKET RTM_NEWNEXTHOPBUCKET
+ RTM_DELNEXTHOPBUCKET,
+#define RTM_DELNEXTHOPBUCKET RTM_DELNEXTHOPBUCKET
+ RTM_GETNEXTHOPBUCKET,
+#define RTM_GETNEXTHOPBUCKET RTM_GETNEXTHOPBUCKET
+
+ RTM_NEWTUNNEL = 120,
+#define RTM_NEWTUNNEL RTM_NEWTUNNEL
+ RTM_DELTUNNEL,
+#define RTM_DELTUNNEL RTM_DELTUNNEL
+ RTM_GETTUNNEL,
+#define RTM_GETTUNNEL RTM_GETTUNNEL
+
+ __RTM_MAX,
+#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
+};
+
+#define RTM_NR_MSGTYPES (RTM_MAX + 1 - RTM_BASE)
+#define RTM_NR_FAMILIES (RTM_NR_MSGTYPES >> 2)
+#define RTM_FAM(cmd) (((cmd) - RTM_BASE) >> 2)
+
+/*
+ Generic structure for encapsulation of optional route information.
+ It is reminiscent of sockaddr, but with sa_family replaced
+ with attribute type.
+ */
+
+struct rtattr {
+ unsigned short rta_len;
+ unsigned short rta_type;
+};
+
+/* Macros to handle rtattributes */
+
+#define RTA_ALIGNTO 4U
+#define RTA_ALIGN(len) ( ((len)+RTA_ALIGNTO-1) & ~(RTA_ALIGNTO-1) )
+#define RTA_OK(rta,len) ((len) >= (int)sizeof(struct rtattr) && \
+ (rta)->rta_len >= sizeof(struct rtattr) && \
+ (rta)->rta_len <= (len))
+#define RTA_NEXT(rta,attrlen) ((attrlen) -= RTA_ALIGN((rta)->rta_len), \
+ (struct rtattr*)(((char*)(rta)) + RTA_ALIGN((rta)->rta_len)))
+#define RTA_LENGTH(len) (RTA_ALIGN(sizeof(struct rtattr)) + (len))
+#define RTA_SPACE(len) RTA_ALIGN(RTA_LENGTH(len))
+#define RTA_DATA(rta) ((void*)(((char*)(rta)) + RTA_LENGTH(0)))
+#define RTA_PAYLOAD(rta) ((int)((rta)->rta_len) - RTA_LENGTH(0))
+
+
+
+
+/******************************************************************************
+ * Definitions used in routing table administration.
+ ****/
+
+struct rtmsg {
+ unsigned char rtm_family;
+ unsigned char rtm_dst_len;
+ unsigned char rtm_src_len;
+ unsigned char rtm_tos;
+
+ unsigned char rtm_table; /* Routing table id */
+ unsigned char rtm_protocol; /* Routing protocol; see below */
+ unsigned char rtm_scope; /* See below */
+ unsigned char rtm_type; /* See below */
+
+ unsigned rtm_flags;
+};
+
+/* rtm_type */
+
+enum {
+ RTN_UNSPEC,
+ RTN_UNICAST, /* Gateway or direct route */
+ RTN_LOCAL, /* Accept locally */
+ RTN_BROADCAST, /* Accept locally as broadcast,
+ send as broadcast */
+ RTN_ANYCAST, /* Accept locally as broadcast,
+ but send as unicast */
+ RTN_MULTICAST, /* Multicast route */
+ RTN_BLACKHOLE, /* Drop */
+ RTN_UNREACHABLE, /* Destination is unreachable */
+ RTN_PROHIBIT, /* Administratively prohibited */
+ RTN_THROW, /* Not in this table */
+ RTN_NAT, /* Translate this address */
+ RTN_XRESOLVE, /* Use external resolver */
+ __RTN_MAX
+};
+
+#define RTN_MAX (__RTN_MAX - 1)
+
+
+/* rtm_protocol */
+
+#define RTPROT_UNSPEC 0
+#define RTPROT_REDIRECT 1 /* Route installed by ICMP redirects;
+ not used by current IPv4 */
+#define RTPROT_KERNEL 2 /* Route installed by kernel */
+#define RTPROT_BOOT 3 /* Route installed during boot */
+#define RTPROT_STATIC 4 /* Route installed by administrator */
+
+/* Values of protocol >= RTPROT_STATIC are not interpreted by kernel;
+ they are just passed from user and back as is.
+ It will be used by hypothetical multiple routing daemons.
+ Note that protocol values should be standardized in order to
+ avoid conflicts.
+ */
+
+#define RTPROT_GATED 8 /* Apparently, GateD */
+#define RTPROT_RA 9 /* RDISC/ND router advertisements */
+#define RTPROT_MRT 10 /* Merit MRT */
+#define RTPROT_ZEBRA 11 /* Zebra */
+#define RTPROT_BIRD 12 /* BIRD */
+#define RTPROT_DNROUTED 13 /* DECnet routing daemon */
+#define RTPROT_XORP 14 /* XORP */
+#define RTPROT_NTK 15 /* Netsukuku */
+#define RTPROT_DHCP 16 /* DHCP client */
+#define RTPROT_MROUTED 17 /* Multicast daemon */
+#define RTPROT_KEEPALIVED 18 /* Keepalived daemon */
+#define RTPROT_BABEL 42 /* Babel daemon */
+#define RTPROT_OVN 84 /* OVN daemon */
+#define RTPROT_OPENR 99 /* Open Routing (Open/R) Routes */
+#define RTPROT_BGP 186 /* BGP Routes */
+#define RTPROT_ISIS 187 /* ISIS Routes */
+#define RTPROT_OSPF 188 /* OSPF Routes */
+#define RTPROT_RIP 189 /* RIP Routes */
+#define RTPROT_EIGRP 192 /* EIGRP Routes */
+
+/* rtm_scope
+
+ Really it is not scope, but sort of distance to the destination.
+ NOWHERE are reserved for not existing destinations, HOST is our
+ local addresses, LINK are destinations, located on directly attached
+ link and UNIVERSE is everywhere in the Universe.
+
+ Intermediate values are also possible f.e. interior routes
+ could be assigned a value between UNIVERSE and LINK.
+*/
+
+enum rt_scope_t {
+ RT_SCOPE_UNIVERSE=0,
+/* User defined values */
+ RT_SCOPE_SITE=200,
+ RT_SCOPE_LINK=253,
+ RT_SCOPE_HOST=254,
+ RT_SCOPE_NOWHERE=255
+};
+
+/* rtm_flags */
+
+#define RTM_F_NOTIFY 0x100 /* Notify user of route change */
+#define RTM_F_CLONED 0x200 /* This route is cloned */
+#define RTM_F_EQUALIZE 0x400 /* Multipath equalizer: NI */
+#define RTM_F_PREFIX 0x800 /* Prefix addresses */
+#define RTM_F_LOOKUP_TABLE 0x1000 /* set rtm_table to FIB lookup result */
+#define RTM_F_FIB_MATCH 0x2000 /* return full fib lookup match */
+#define RTM_F_OFFLOAD 0x4000 /* route is offloaded */
+#define RTM_F_TRAP 0x8000 /* route is trapping packets */
+#define RTM_F_OFFLOAD_FAILED 0x20000000 /* route offload failed, this value
+ * is chosen to avoid conflicts with
+ * other flags defined in
+ * include/uapi/linux/ipv6_route.h
+ */
+
+/* Reserved table identifiers */
+
+enum rt_class_t {
+ RT_TABLE_UNSPEC=0,
+/* User defined values */
+ RT_TABLE_COMPAT=252,
+ RT_TABLE_DEFAULT=253,
+ RT_TABLE_MAIN=254,
+ RT_TABLE_LOCAL=255,
+ RT_TABLE_MAX=0xFFFFFFFF
+};
+
+
+/* Routing message attributes */
+
+enum rtattr_type_t {
+ RTA_UNSPEC,
+ RTA_DST,
+ RTA_SRC,
+ RTA_IIF,
+ RTA_OIF,
+ RTA_GATEWAY,
+ RTA_PRIORITY,
+ RTA_PREFSRC,
+ RTA_METRICS,
+ RTA_MULTIPATH,
+ RTA_PROTOINFO, /* no longer used */
+ RTA_FLOW,
+ RTA_CACHEINFO,
+ RTA_SESSION, /* no longer used */
+ RTA_MP_ALGO, /* no longer used */
+ RTA_TABLE,
+ RTA_MARK,
+ RTA_MFC_STATS,
+ RTA_VIA,
+ RTA_NEWDST,
+ RTA_PREF,
+ RTA_ENCAP_TYPE,
+ RTA_ENCAP,
+ RTA_EXPIRES,
+ RTA_PAD,
+ RTA_UID,
+ RTA_TTL_PROPAGATE,
+ RTA_IP_PROTO,
+ RTA_SPORT,
+ RTA_DPORT,
+ RTA_NH_ID,
+ RTA_FLOWLABEL,
+ __RTA_MAX
+};
+
+#define RTA_MAX (__RTA_MAX - 1)
+
+#define RTM_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct rtmsg))))
+#define RTM_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct rtmsg))
+
+/* RTM_MULTIPATH --- array of struct rtnexthop.
+ *
+ * "struct rtnexthop" describes all necessary nexthop information,
+ * i.e. parameters of path to a destination via this nexthop.
+ *
+ * At the moment it is impossible to set different prefsrc, mtu, window
+ * and rtt for different paths from multipath.
+ */
+
+struct rtnexthop {
+ unsigned short rtnh_len;
+ unsigned char rtnh_flags;
+ unsigned char rtnh_hops;
+ int rtnh_ifindex;
+};
+
+/* rtnh_flags */
+
+#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
+#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
+#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
+#define RTNH_F_OFFLOAD 8 /* Nexthop is offloaded */
+#define RTNH_F_LINKDOWN 16 /* carrier-down on nexthop */
+#define RTNH_F_UNRESOLVED 32 /* The entry is unresolved (ipmr) */
+#define RTNH_F_TRAP 64 /* Nexthop is trapping packets */
+
+#define RTNH_COMPARE_MASK (RTNH_F_DEAD | RTNH_F_LINKDOWN | \
+ RTNH_F_OFFLOAD | RTNH_F_TRAP)
+
+/* Macros to handle hexthops */
+
+#define RTNH_ALIGNTO 4
+#define RTNH_ALIGN(len) ( ((len)+RTNH_ALIGNTO-1) & ~(RTNH_ALIGNTO-1) )
+#define RTNH_OK(rtnh,len) ((rtnh)->rtnh_len >= sizeof(struct rtnexthop) && \
+ ((int)(rtnh)->rtnh_len) <= (len))
+#define RTNH_NEXT(rtnh) ((struct rtnexthop*)(((char*)(rtnh)) + RTNH_ALIGN((rtnh)->rtnh_len)))
+#define RTNH_LENGTH(len) (RTNH_ALIGN(sizeof(struct rtnexthop)) + (len))
+#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len))
+#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0)))
+
+/* RTA_VIA */
+struct rtvia {
+ __kernel_sa_family_t rtvia_family;
+ __u8 rtvia_addr[];
+};
+
+/* RTM_CACHEINFO */
+
+struct rta_cacheinfo {
+ __u32 rta_clntref;
+ __u32 rta_lastuse;
+ __s32 rta_expires;
+ __u32 rta_error;
+ __u32 rta_used;
+
+#define RTNETLINK_HAVE_PEERINFO 1
+ __u32 rta_id;
+ __u32 rta_ts;
+ __u32 rta_tsage;
+};
+
+/* RTM_METRICS --- array of struct rtattr with types of RTAX_* */
+
+enum {
+ RTAX_UNSPEC,
+#define RTAX_UNSPEC RTAX_UNSPEC
+ RTAX_LOCK,
+#define RTAX_LOCK RTAX_LOCK
+ RTAX_MTU,
+#define RTAX_MTU RTAX_MTU
+ RTAX_WINDOW,
+#define RTAX_WINDOW RTAX_WINDOW
+ RTAX_RTT,
+#define RTAX_RTT RTAX_RTT
+ RTAX_RTTVAR,
+#define RTAX_RTTVAR RTAX_RTTVAR
+ RTAX_SSTHRESH,
+#define RTAX_SSTHRESH RTAX_SSTHRESH
+ RTAX_CWND,
+#define RTAX_CWND RTAX_CWND
+ RTAX_ADVMSS,
+#define RTAX_ADVMSS RTAX_ADVMSS
+ RTAX_REORDERING,
+#define RTAX_REORDERING RTAX_REORDERING
+ RTAX_HOPLIMIT,
+#define RTAX_HOPLIMIT RTAX_HOPLIMIT
+ RTAX_INITCWND,
+#define RTAX_INITCWND RTAX_INITCWND
+ RTAX_FEATURES,
+#define RTAX_FEATURES RTAX_FEATURES
+ RTAX_RTO_MIN,
+#define RTAX_RTO_MIN RTAX_RTO_MIN
+ RTAX_INITRWND,
+#define RTAX_INITRWND RTAX_INITRWND
+ RTAX_QUICKACK,
+#define RTAX_QUICKACK RTAX_QUICKACK
+ RTAX_CC_ALGO,
+#define RTAX_CC_ALGO RTAX_CC_ALGO
+ RTAX_FASTOPEN_NO_COOKIE,
+#define RTAX_FASTOPEN_NO_COOKIE RTAX_FASTOPEN_NO_COOKIE
+ __RTAX_MAX
+};
+
+#define RTAX_MAX (__RTAX_MAX - 1)
+
+#define RTAX_FEATURE_ECN (1 << 0)
+#define RTAX_FEATURE_SACK (1 << 1) /* unused */
+#define RTAX_FEATURE_TIMESTAMP (1 << 2) /* unused */
+#define RTAX_FEATURE_ALLFRAG (1 << 3) /* unused */
+#define RTAX_FEATURE_TCP_USEC_TS (1 << 4)
+
+#define RTAX_FEATURE_MASK (RTAX_FEATURE_ECN | \
+ RTAX_FEATURE_SACK | \
+ RTAX_FEATURE_TIMESTAMP | \
+ RTAX_FEATURE_ALLFRAG | \
+ RTAX_FEATURE_TCP_USEC_TS)
+
+struct rta_session {
+ __u8 proto;
+ __u8 pad1;
+ __u16 pad2;
+
+ union {
+ struct {
+ __u16 sport;
+ __u16 dport;
+ } ports;
+
+ struct {
+ __u8 type;
+ __u8 code;
+ __u16 ident;
+ } icmpt;
+
+ __u32 spi;
+ } u;
+};
+
+struct rta_mfc_stats {
+ __u64 mfcs_packets;
+ __u64 mfcs_bytes;
+ __u64 mfcs_wrong_if;
+};
+
+/****
+ * General form of address family dependent message.
+ ****/
+
+struct rtgenmsg {
+ unsigned char rtgen_family;
+};
+
+/*****************************************************************
+ * Link layer specific messages.
+ ****/
+
+/* struct ifinfomsg
+ * passes link level specific information, not dependent
+ * on network protocol.
+ */
+
+struct ifinfomsg {
+ unsigned char ifi_family;
+ unsigned char __ifi_pad;
+ unsigned short ifi_type; /* ARPHRD_* */
+ int ifi_index; /* Link index */
+ unsigned ifi_flags; /* IFF_* flags */
+ unsigned ifi_change; /* IFF_* change mask */
+};
+
+/********************************************************************
+ * prefix information
+ ****/
+
+struct prefixmsg {
+ unsigned char prefix_family;
+ unsigned char prefix_pad1;
+ unsigned short prefix_pad2;
+ int prefix_ifindex;
+ unsigned char prefix_type;
+ unsigned char prefix_len;
+ unsigned char prefix_flags;
+ unsigned char prefix_pad3;
+};
+
+enum
+{
+ PREFIX_UNSPEC,
+ PREFIX_ADDRESS,
+ PREFIX_CACHEINFO,
+ __PREFIX_MAX
+};
+
+#define PREFIX_MAX (__PREFIX_MAX - 1)
+
+struct prefix_cacheinfo {
+ __u32 preferred_time;
+ __u32 valid_time;
+};
+
+
+/*****************************************************************
+ * Traffic control messages.
+ ****/
+
+struct tcmsg {
+ unsigned char tcm_family;
+ unsigned char tcm__pad1;
+ unsigned short tcm__pad2;
+ int tcm_ifindex;
+ __u32 tcm_handle;
+ __u32 tcm_parent;
+/* tcm_block_index is used instead of tcm_parent
+ * in case tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK
+ */
+#define tcm_block_index tcm_parent
+ __u32 tcm_info;
+};
+
+/* For manipulation of filters in shared block, tcm_ifindex is set to
+ * TCM_IFINDEX_MAGIC_BLOCK, and tcm_parent is aliased to tcm_block_index
+ * which is the block index.
+ */
+#define TCM_IFINDEX_MAGIC_BLOCK (0xFFFFFFFFU)
+
+enum {
+ TCA_UNSPEC,
+ TCA_KIND,
+ TCA_OPTIONS,
+ TCA_STATS,
+ TCA_XSTATS,
+ TCA_RATE,
+ TCA_FCNT,
+ TCA_STATS2,
+ TCA_STAB,
+ TCA_PAD,
+ TCA_DUMP_INVISIBLE,
+ TCA_CHAIN,
+ TCA_HW_OFFLOAD,
+ TCA_INGRESS_BLOCK,
+ TCA_EGRESS_BLOCK,
+ TCA_DUMP_FLAGS,
+ TCA_EXT_WARN_MSG,
+ __TCA_MAX
+};
+
+#define TCA_MAX (__TCA_MAX - 1)
+
+#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic
+ * data necessary to identify the objects
+ * (handle, cookie, etc.) and stats.
+ */
+
+#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg))))
+#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg))
+
+/********************************************************************
+ * Neighbor Discovery userland options
+ ****/
+
+struct nduseroptmsg {
+ unsigned char nduseropt_family;
+ unsigned char nduseropt_pad1;
+ unsigned short nduseropt_opts_len; /* Total length of options */
+ int nduseropt_ifindex;
+ __u8 nduseropt_icmp_type;
+ __u8 nduseropt_icmp_code;
+ unsigned short nduseropt_pad2;
+ unsigned int nduseropt_pad3;
+ /* Followed by one or more ND options */
+};
+
+enum {
+ NDUSEROPT_UNSPEC,
+ NDUSEROPT_SRCADDR,
+ __NDUSEROPT_MAX
+};
+
+#define NDUSEROPT_MAX (__NDUSEROPT_MAX - 1)
+
+#ifndef __KERNEL__
+/* RTnetlink multicast groups - backwards compatibility for userspace */
+#define RTMGRP_LINK 1
+#define RTMGRP_NOTIFY 2
+#define RTMGRP_NEIGH 4
+#define RTMGRP_TC 8
+
+#define RTMGRP_IPV4_IFADDR 0x10
+#define RTMGRP_IPV4_MROUTE 0x20
+#define RTMGRP_IPV4_ROUTE 0x40
+#define RTMGRP_IPV4_RULE 0x80
+
+#define RTMGRP_IPV6_IFADDR 0x100
+#define RTMGRP_IPV6_MROUTE 0x200
+#define RTMGRP_IPV6_ROUTE 0x400
+#define RTMGRP_IPV6_IFINFO 0x800
+
+#define RTMGRP_DECnet_IFADDR 0x1000
+#define RTMGRP_DECnet_ROUTE 0x4000
+
+#define RTMGRP_IPV6_PREFIX 0x20000
+#endif
+
+/* RTnetlink multicast groups */
+enum rtnetlink_groups {
+ RTNLGRP_NONE,
+#define RTNLGRP_NONE RTNLGRP_NONE
+ RTNLGRP_LINK,
+#define RTNLGRP_LINK RTNLGRP_LINK
+ RTNLGRP_NOTIFY,
+#define RTNLGRP_NOTIFY RTNLGRP_NOTIFY
+ RTNLGRP_NEIGH,
+#define RTNLGRP_NEIGH RTNLGRP_NEIGH
+ RTNLGRP_TC,
+#define RTNLGRP_TC RTNLGRP_TC
+ RTNLGRP_IPV4_IFADDR,
+#define RTNLGRP_IPV4_IFADDR RTNLGRP_IPV4_IFADDR
+ RTNLGRP_IPV4_MROUTE,
+#define RTNLGRP_IPV4_MROUTE RTNLGRP_IPV4_MROUTE
+ RTNLGRP_IPV4_ROUTE,
+#define RTNLGRP_IPV4_ROUTE RTNLGRP_IPV4_ROUTE
+ RTNLGRP_IPV4_RULE,
+#define RTNLGRP_IPV4_RULE RTNLGRP_IPV4_RULE
+ RTNLGRP_IPV6_IFADDR,
+#define RTNLGRP_IPV6_IFADDR RTNLGRP_IPV6_IFADDR
+ RTNLGRP_IPV6_MROUTE,
+#define RTNLGRP_IPV6_MROUTE RTNLGRP_IPV6_MROUTE
+ RTNLGRP_IPV6_ROUTE,
+#define RTNLGRP_IPV6_ROUTE RTNLGRP_IPV6_ROUTE
+ RTNLGRP_IPV6_IFINFO,
+#define RTNLGRP_IPV6_IFINFO RTNLGRP_IPV6_IFINFO
+ RTNLGRP_DECnet_IFADDR,
+#define RTNLGRP_DECnet_IFADDR RTNLGRP_DECnet_IFADDR
+ RTNLGRP_NOP2,
+ RTNLGRP_DECnet_ROUTE,
+#define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE
+ RTNLGRP_DECnet_RULE,
+#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE
+ RTNLGRP_NOP4,
+ RTNLGRP_IPV6_PREFIX,
+#define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX
+ RTNLGRP_IPV6_RULE,
+#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE
+ RTNLGRP_ND_USEROPT,
+#define RTNLGRP_ND_USEROPT RTNLGRP_ND_USEROPT
+ RTNLGRP_PHONET_IFADDR,
+#define RTNLGRP_PHONET_IFADDR RTNLGRP_PHONET_IFADDR
+ RTNLGRP_PHONET_ROUTE,
+#define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE
+ RTNLGRP_DCB,
+#define RTNLGRP_DCB RTNLGRP_DCB
+ RTNLGRP_IPV4_NETCONF,
+#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF
+ RTNLGRP_IPV6_NETCONF,
+#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF
+ RTNLGRP_MDB,
+#define RTNLGRP_MDB RTNLGRP_MDB
+ RTNLGRP_MPLS_ROUTE,
+#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE
+ RTNLGRP_NSID,
+#define RTNLGRP_NSID RTNLGRP_NSID
+ RTNLGRP_MPLS_NETCONF,
+#define RTNLGRP_MPLS_NETCONF RTNLGRP_MPLS_NETCONF
+ RTNLGRP_IPV4_MROUTE_R,
+#define RTNLGRP_IPV4_MROUTE_R RTNLGRP_IPV4_MROUTE_R
+ RTNLGRP_IPV6_MROUTE_R,
+#define RTNLGRP_IPV6_MROUTE_R RTNLGRP_IPV6_MROUTE_R
+ RTNLGRP_NEXTHOP,
+#define RTNLGRP_NEXTHOP RTNLGRP_NEXTHOP
+ RTNLGRP_BRVLAN,
+#define RTNLGRP_BRVLAN RTNLGRP_BRVLAN
+ RTNLGRP_MCTP_IFADDR,
+#define RTNLGRP_MCTP_IFADDR RTNLGRP_MCTP_IFADDR
+ RTNLGRP_TUNNEL,
+#define RTNLGRP_TUNNEL RTNLGRP_TUNNEL
+ RTNLGRP_STATS,
+#define RTNLGRP_STATS RTNLGRP_STATS
+ RTNLGRP_IPV4_MCADDR,
+#define RTNLGRP_IPV4_MCADDR RTNLGRP_IPV4_MCADDR
+ RTNLGRP_IPV6_MCADDR,
+#define RTNLGRP_IPV6_MCADDR RTNLGRP_IPV6_MCADDR
+ RTNLGRP_IPV6_ACADDR,
+#define RTNLGRP_IPV6_ACADDR RTNLGRP_IPV6_ACADDR
+ __RTNLGRP_MAX
+};
+#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
+
+/* TC action piece */
+struct tcamsg {
+ unsigned char tca_family;
+ unsigned char tca__pad1;
+ unsigned short tca__pad2;
+};
+
+enum {
+ TCA_ROOT_UNSPEC,
+ TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+#define TCAA_MAX TCA_ROOT_TAB
+ TCA_ROOT_FLAGS,
+ TCA_ROOT_COUNT,
+ TCA_ROOT_TIME_DELTA, /* in msecs */
+ TCA_ROOT_EXT_WARN_MSG,
+ __TCA_ROOT_MAX,
+#define TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
+#define TA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
+#define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_ACT_FLAG_LARGE_DUMP_ON user->kernel to request for larger than
+ * TCA_ACT_MAX_PRIO actions in a dump. All dump responses will contain the
+ * number of actions being dumped stored in for user app's consumption in
+ * TCA_ROOT_COUNT
+ *
+ * TCA_ACT_FLAG_TERSE_DUMP user->kernel to request terse (brief) dump that only
+ * includes essential action info (kind, index, etc.)
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON (1 << 0)
+#define TCA_ACT_FLAG_LARGE_DUMP_ON TCA_FLAG_LARGE_DUMP_ON
+#define TCA_ACT_FLAG_TERSE_DUMP (1 << 1)
+
+/* New extended info filters for IFLA_EXT_MASK */
+#define RTEXT_FILTER_VF (1 << 0)
+#define RTEXT_FILTER_BRVLAN (1 << 1)
+#define RTEXT_FILTER_BRVLAN_COMPRESSED (1 << 2)
+#define RTEXT_FILTER_SKIP_STATS (1 << 3)
+#define RTEXT_FILTER_MRP (1 << 4)
+#define RTEXT_FILTER_CFM_CONFIG (1 << 5)
+#define RTEXT_FILTER_CFM_STATUS (1 << 6)
+#define RTEXT_FILTER_MST (1 << 7)
+
+/* End of information exported to user level */
+
+
+
+#endif /* _UAPI__LINUX_RTNETLINK_H */
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index e2cd558ca0b4..c80204bb72a2 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,4 +1,4 @@
-libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
+libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_utils.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \
btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \
usdt.o zip.o elf.o features.o btf_iter.o btf_relocate.o
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index ab40dbf9f020..b66f5fbfbbb2 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -154,7 +154,7 @@ int bump_rlimit_memlock(void)
memlock_bumped = true;
- /* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */
+ /* zero memlock_rlim disables auto-bumping RLIMIT_MEMLOCK */
if (memlock_rlim == 0)
return 0;
@@ -172,7 +172,7 @@ int bpf_map_create(enum bpf_map_type map_type,
__u32 max_entries,
const struct bpf_map_create_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd);
+ const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
union bpf_attr attr;
int fd;
@@ -203,6 +203,8 @@ int bpf_map_create(enum bpf_map_type map_type,
attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0);
attr.map_token_fd = OPTS_GET(opts, token_fd, 0);
+ attr.excl_prog_hash = ptr_to_u64(OPTS_GET(opts, excl_prog_hash, NULL));
+ attr.excl_prog_hash_size = OPTS_GET(opts, excl_prog_hash_size, 0);
fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);
return libbpf_err_errno(fd);
@@ -238,7 +240,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
const struct bpf_insn *insns, size_t insn_cnt,
struct bpf_prog_load_opts *opts)
{
- const size_t attr_sz = offsetofend(union bpf_attr, fd_array_cnt);
+ const size_t attr_sz = offsetofend(union bpf_attr, keyring_id);
void *finfo = NULL, *linfo = NULL;
const char *func_info, *line_info;
__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 7252150e7ad3..e983a3e40d61 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -54,9 +54,12 @@ struct bpf_map_create_opts {
__s32 value_type_btf_obj_fd;
__u32 token_fd;
+
+ const void *excl_prog_hash;
+ __u32 excl_prog_hash_size;
size_t :0;
};
-#define bpf_map_create_opts__last_field token_fd
+#define bpf_map_create_opts__last_field excl_prog_hash_size
LIBBPF_API int bpf_map_create(enum bpf_map_type map_type,
const char *map_name,
diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h
index 6ff963a491d9..49af4260b8e6 100644
--- a/tools/lib/bpf/bpf_gen_internal.h
+++ b/tools/lib/bpf/bpf_gen_internal.h
@@ -4,6 +4,7 @@
#define __BPF_GEN_INTERNAL_H
#include "bpf.h"
+#include "libbpf_internal.h"
struct ksym_relo_desc {
const char *name;
@@ -50,6 +51,7 @@ struct bpf_gen {
__u32 nr_ksyms;
int fd_array;
int nr_fd_array;
+ int hash_insn_offset[SHA256_DWORD_SIZE];
};
void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps);
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 80c028540656..d4e4e388e625 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -315,20 +315,20 @@ enum libbpf_tristate {
___param, sizeof(___param)); \
})
-extern int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args,
- __u32 len__sz, void *aux__prog) __weak __ksym;
-
-#define bpf_stream_printk(stream_id, fmt, args...) \
-({ \
- static const char ___fmt[] = fmt; \
- unsigned long long ___param[___bpf_narg(args)]; \
- \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- ___bpf_fill(___param, args); \
- _Pragma("GCC diagnostic pop") \
- \
- bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param), NULL);\
+extern int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args,
+ __u32 len__sz, void *aux__prog) __weak __ksym;
+
+#define bpf_stream_printk(stream_id, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_stream_vprintk_impl(stream_id, ___fmt, ___param, sizeof(___param), NULL); \
})
/* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index a8f6cd4841b0..dbe32a5d02cd 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -311,7 +311,7 @@ struct pt_regs___arm64 {
#define __PT_RET_REG regs[31]
#define __PT_FP_REG __unsupported__
#define __PT_RC_REG gpr[3]
-#define __PT_SP_REG sp
+#define __PT_SP_REG gpr[1]
#define __PT_IP_REG nip
#elif defined(bpf_target_sparc)
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 37682908cb0f..84a4b0abc8be 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -23,7 +23,6 @@
#include "libbpf_internal.h"
#include "hashmap.h"
#include "strset.h"
-#include "str_error.h"
#define BTF_MAX_NR_TYPES 0x7fffffffU
#define BTF_MAX_STR_OFFSET 0x7fffffffU
@@ -1062,7 +1061,7 @@ static struct btf *btf_new(const void *data, __u32 size, struct btf *base_btf, b
if (base_btf) {
btf->base_btf = base_btf;
btf->start_id = btf__type_cnt(base_btf);
- btf->start_str_off = base_btf->hdr->str_len;
+ btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
}
if (is_mmap) {
@@ -3902,6 +3901,20 @@ err_out:
return err;
}
+/*
+ * Calculate type signature hash of TYPEDEF, ignoring referenced type IDs,
+ * as referenced type IDs equivalence is established separately during type
+ * graph equivalence check algorithm.
+ */
+static long btf_hash_typedef(struct btf_type *t)
+{
+ long h;
+
+ h = hash_combine(0, t->name_off);
+ h = hash_combine(h, t->info);
+ return h;
+}
+
static long btf_hash_common(struct btf_type *t)
{
long h;
@@ -3919,6 +3932,13 @@ static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2)
t1->size == t2->size;
}
+/* Check structural compatibility of two TYPEDEF. */
+static bool btf_equal_typedef(struct btf_type *t1, struct btf_type *t2)
+{
+ return t1->name_off == t2->name_off &&
+ t1->info == t2->info;
+}
+
/* Calculate type signature hash of INT or TAG. */
static long btf_hash_int_decl_tag(struct btf_type *t)
{
@@ -4845,13 +4865,30 @@ static void btf_dedup_merge_hypot_map(struct btf_dedup *d)
}
}
+static inline long btf_hash_by_kind(struct btf_type *t, __u16 kind)
+{
+ if (kind == BTF_KIND_TYPEDEF)
+ return btf_hash_typedef(t);
+ else
+ return btf_hash_struct(t);
+}
+
+static inline bool btf_equal_by_kind(struct btf_type *t1, struct btf_type *t2, __u16 kind)
+{
+ if (kind == BTF_KIND_TYPEDEF)
+ return btf_equal_typedef(t1, t2);
+ else
+ return btf_shallow_equal_struct(t1, t2);
+}
+
/*
- * Deduplicate struct/union types.
+ * Deduplicate struct/union and typedef types.
*
* For each struct/union type its type signature hash is calculated, taking
* into account type's name, size, number, order and names of fields, but
* ignoring type ID's referenced from fields, because they might not be deduped
- * completely until after reference types deduplication phase. This type hash
+ * completely until after reference types deduplication phase. For each typedef
+ * type, the hash is computed based on the type’s name and size. This type hash
* is used to iterate over all potential canonical types, sharing same hash.
* For each canonical candidate we check whether type graphs that they form
* (through referenced types in fields and so on) are equivalent using algorithm
@@ -4883,18 +4920,20 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
t = btf_type_by_id(d->btf, type_id);
kind = btf_kind(t);
- if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION)
+ if (kind != BTF_KIND_STRUCT &&
+ kind != BTF_KIND_UNION &&
+ kind != BTF_KIND_TYPEDEF)
return 0;
- h = btf_hash_struct(t);
+ h = btf_hash_by_kind(t, kind);
for_each_dedup_cand(d, hash_entry, h) {
__u32 cand_id = hash_entry->value;
int eq;
/*
* Even though btf_dedup_is_equiv() checks for
- * btf_shallow_equal_struct() internally when checking two
- * structs (unions) for equivalence, we need to guard here
+ * btf_equal_by_kind() internally when checking two
+ * structs (unions) or typedefs for equivalence, we need to guard here
* from picking matching FWD type as a dedup candidate.
* This can happen due to hash collision. In such case just
* relying on btf_dedup_is_equiv() would lead to potentially
@@ -4902,7 +4941,7 @@ static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id)
* FWD and compatible STRUCT/UNION are considered equivalent.
*/
cand_type = btf_type_by_id(d->btf, cand_id);
- if (!btf_shallow_equal_struct(t, cand_type))
+ if (!btf_equal_by_kind(t, cand_type, kind))
continue;
btf_dedup_clear_hypot_map(d);
@@ -4940,18 +4979,18 @@ static int btf_dedup_struct_types(struct btf_dedup *d)
/*
* Deduplicate reference type.
*
- * Once all primitive and struct/union types got deduplicated, we can easily
+ * Once all primitive, struct/union and typedef types got deduplicated, we can easily
* deduplicate all other (reference) BTF types. This is done in two steps:
*
* 1. Resolve all referenced type IDs into their canonical type IDs. This
- * resolution can be done either immediately for primitive or struct/union types
- * (because they were deduped in previous two phases) or recursively for
+ * resolution can be done either immediately for primitive, struct/union, and typedef
+ * types (because they were deduped in previous two phases) or recursively for
* reference types. Recursion will always terminate at either primitive or
- * struct/union type, at which point we can "unwind" chain of reference types
- * one by one. There is no danger of encountering cycles because in C type
- * system the only way to form type cycle is through struct/union, so any chain
- * of reference types, even those taking part in a type cycle, will inevitably
- * reach struct/union at some point.
+ * struct/union and typedef types, at which point we can "unwind" chain of reference
+ * types one by one. There is no danger of encountering cycles in C, as the only way to
+ * form a type cycle is through struct or union types. Go can form such cycles through
+ * typedef. Thus, any chain of reference types, even those taking part in a type cycle,
+ * will inevitably reach a struct/union or typedef type at some point.
*
* 2. Once all referenced type IDs are resolved into canonical ones, BTF type
* becomes "stable", in the sense that no further deduplication will cause
@@ -4983,7 +5022,6 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)
case BTF_KIND_VOLATILE:
case BTF_KIND_RESTRICT:
case BTF_KIND_PTR:
- case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
case BTF_KIND_TYPE_TAG:
ref_type_id = btf_dedup_ref_type(d, t->type);
@@ -5819,7 +5857,7 @@ void btf_set_base_btf(struct btf *btf, const struct btf *base_btf)
{
btf->base_btf = (struct btf *)base_btf;
btf->start_id = btf__type_cnt(base_btf);
- btf->start_str_off = base_btf->hdr->str_len;
+ btf->start_str_off = base_btf->hdr->str_len + base_btf->start_str_off;
}
int btf__relocate(struct btf *btf, const struct btf *base_btf)
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index ccfd905f03df..cc01494d6210 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -94,6 +94,7 @@ LIBBPF_API struct btf *btf__new_empty(void);
* @brief **btf__new_empty_split()** creates an unpopulated BTF object from an
* ELF BTF section except with a base BTF on top of which split BTF should be
* based
+ * @param base_btf base BTF object
* @return new BTF object instance which has to be eventually freed with
* **btf__free()**
*
@@ -115,6 +116,10 @@ LIBBPF_API struct btf *btf__new_empty_split(struct btf *base_btf);
* When that split BTF is loaded against a (possibly changed) base, this
* distilled base BTF will help update references to that (possibly changed)
* base BTF.
+ * @param src_btf source split BTF object
+ * @param new_base_btf pointer to where the new base BTF object pointer will be stored
+ * @param new_split_btf pointer to where the new split BTF object pointer will be stored
+ * @return 0 on success; negative error code, otherwise
*
* Both the new split and its associated new base BTF must be freed by
* the caller.
@@ -264,6 +269,9 @@ LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts);
* to base BTF kinds, and verify those references are compatible with
* *base_btf*; if they are, *btf* is adjusted such that is re-parented to
* *base_btf* and type ids and strings are adjusted to accommodate this.
+ * @param btf split BTF object to relocate
+ * @param base_btf base BTF object
+ * @return 0 on success; negative error code, otherwise
*
* If successful, 0 is returned and **btf** now has **base_btf** as its
* base.
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index f09f25eccf3c..6388392f49a0 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -21,7 +21,6 @@
#include "hashmap.h"
#include "libbpf.h"
#include "libbpf_internal.h"
-#include "str_error.h"
static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t";
static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1;
diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c
index 823f83ad819c..295dbda24580 100644
--- a/tools/lib/bpf/elf.c
+++ b/tools/lib/bpf/elf.c
@@ -9,7 +9,6 @@
#include <linux/kernel.h>
#include "libbpf_internal.h"
-#include "str_error.h"
/* A SHT_GNU_versym section holds 16-bit words. This bit is set if
* the symbol is hidden and can only be seen when referenced using an
diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c
index 760657f5224c..b842b83e2480 100644
--- a/tools/lib/bpf/features.c
+++ b/tools/lib/bpf/features.c
@@ -6,7 +6,6 @@
#include "libbpf.h"
#include "libbpf_common.h"
#include "libbpf_internal.h"
-#include "str_error.h"
static inline __u64 ptr_to_u64(const void *ptr)
{
diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c
index 113ae4abd345..cd5c2543f54d 100644
--- a/tools/lib/bpf/gen_loader.c
+++ b/tools/lib/bpf/gen_loader.c
@@ -4,6 +4,7 @@
#include <stdlib.h>
#include <string.h>
#include <errno.h>
+#include <asm/byteorder.h>
#include <linux/filter.h>
#include <sys/param.h>
#include "btf.h"
@@ -13,8 +14,6 @@
#include "hashmap.h"
#include "bpf_gen_internal.h"
#include "skel_internal.h"
-#include <asm/byteorder.h>
-#include "str_error.h"
#define MAX_USED_MAPS 64
#define MAX_USED_PROGS 32
@@ -110,6 +109,7 @@ static void emit2(struct bpf_gen *gen, struct bpf_insn insn1, struct bpf_insn in
static int add_data(struct bpf_gen *gen, const void *data, __u32 size);
static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off);
+static void emit_signature_match(struct bpf_gen *gen);
void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps)
{
@@ -152,6 +152,8 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps
/* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
emit(gen, BPF_EXIT_INSN());
+ if (OPTS_GET(gen->opts, gen_hash, false))
+ emit_signature_match(gen);
}
static int add_data(struct bpf_gen *gen, const void *data, __u32 size)
@@ -368,6 +370,8 @@ static void emit_sys_close_blob(struct bpf_gen *gen, int blob_off)
__emit_sys_close(gen);
}
+static void compute_sha_update_offsets(struct bpf_gen *gen);
+
int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
{
int i;
@@ -394,6 +398,9 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)
blob_fd_array_off(gen, i));
emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0));
emit(gen, BPF_EXIT_INSN());
+ if (OPTS_GET(gen->opts, gen_hash, false))
+ compute_sha_update_offsets(gen);
+
pr_debug("gen: finish %s\n", errstr(gen->error));
if (!gen->error) {
struct gen_loader_opts *opts = gen->opts;
@@ -446,6 +453,22 @@ void bpf_gen__free(struct bpf_gen *gen)
_val; \
})
+static void compute_sha_update_offsets(struct bpf_gen *gen)
+{
+ __u64 sha[SHA256_DWORD_SIZE];
+ __u64 sha_dw;
+ int i;
+
+ libbpf_sha256(gen->data_start, gen->data_cur - gen->data_start, (__u8 *)sha);
+ for (i = 0; i < SHA256_DWORD_SIZE; i++) {
+ struct bpf_insn *insn =
+ (struct bpf_insn *)(gen->insn_start + gen->hash_insn_offset[i]);
+ sha_dw = tgt_endian(sha[i]);
+ insn[0].imm = (__u32)sha_dw;
+ insn[1].imm = sha_dw >> 32;
+ }
+}
+
void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,
__u32 btf_raw_size)
{
@@ -557,6 +580,29 @@ void bpf_gen__map_create(struct bpf_gen *gen,
emit_sys_close_stack(gen, stack_off(inner_map_fd));
}
+static void emit_signature_match(struct bpf_gen *gen)
+{
+ __s64 off;
+ int i;
+
+ for (i = 0; i < SHA256_DWORD_SIZE; i++) {
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX,
+ 0, 0, 0, 0));
+ emit(gen, BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, i * sizeof(__u64)));
+ gen->hash_insn_offset[i] = gen->insn_cur - gen->insn_start;
+ emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_3, 0, 0, 0, 0, 0));
+
+ off = -(gen->insn_cur - gen->insn_start - gen->cleanup_label) / 8 - 1;
+ if (is_simm16(off)) {
+ emit(gen, BPF_MOV64_IMM(BPF_REG_7, -EINVAL));
+ emit(gen, BPF_JMP_REG(BPF_JNE, BPF_REG_2, BPF_REG_3, off));
+ } else {
+ gen->error = -ERANGE;
+ emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1));
+ }
+ }
+}
+
void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *attach_name,
enum bpf_attach_type type)
{
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8f5a81b672e1..3dc8a8078815 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -50,7 +50,6 @@
#include "libbpf.h"
#include "bpf.h"
#include "btf.h"
-#include "str_error.h"
#include "libbpf_internal.h"
#include "hashmap.h"
#include "bpf_gen_internal.h"
@@ -191,6 +190,7 @@ static const char * const map_type_name[] = {
[BPF_MAP_TYPE_USER_RINGBUF] = "user_ringbuf",
[BPF_MAP_TYPE_CGRP_STORAGE] = "cgrp_storage",
[BPF_MAP_TYPE_ARENA] = "arena",
+ [BPF_MAP_TYPE_INSN_ARRAY] = "insn_array",
};
static const char * const prog_type_name[] = {
@@ -318,8 +318,6 @@ static void pr_perm_msg(int err)
buf);
}
-#define STRERR_BUFSIZE 128
-
/* Copied from tools/perf/util/util.h */
#ifndef zfree
# define zfree(ptr) ({ free(*ptr); *ptr = NULL; })
@@ -372,6 +370,7 @@ enum reloc_type {
RELO_EXTERN_CALL,
RELO_SUBPROG_ADDR,
RELO_CORE,
+ RELO_INSN_ARRAY,
};
struct reloc_desc {
@@ -382,7 +381,16 @@ struct reloc_desc {
struct {
int map_idx;
int sym_off;
- int ext_idx;
+ /*
+ * The following two fields can be unionized, as the
+ * ext_idx field is used for extern symbols, and the
+ * sym_size is used for jump tables, which are never
+ * extern
+ */
+ union {
+ int ext_idx;
+ int sym_size;
+ };
};
};
};
@@ -424,6 +432,11 @@ struct bpf_sec_def {
libbpf_prog_attach_fn_t prog_attach_fn;
};
+struct bpf_light_subprog {
+ __u32 sec_insn_off;
+ __u32 sub_insn_off;
+};
+
/*
* bpf_prog should be a better name but it has been used in
* linux/filter.h.
@@ -496,6 +509,10 @@ struct bpf_program {
__u32 line_info_rec_size;
__u32 line_info_cnt;
__u32 prog_flags;
+ __u8 hash[SHA256_DIGEST_LENGTH];
+
+ struct bpf_light_subprog *subprogs;
+ __u32 subprog_cnt;
};
struct bpf_struct_ops {
@@ -575,6 +592,7 @@ struct bpf_map {
bool autocreate;
bool autoattach;
__u64 map_extra;
+ struct bpf_program *excl_prog;
};
enum extern_type {
@@ -668,6 +686,7 @@ struct elf_state {
int symbols_shndx;
bool has_st_ops;
int arena_data_shndx;
+ int jumptables_data_shndx;
};
struct usdt_manager;
@@ -739,6 +758,16 @@ struct bpf_object {
void *arena_data;
size_t arena_data_sz;
+ void *jumptables_data;
+ size_t jumptables_data_sz;
+
+ struct {
+ struct bpf_program *prog;
+ int sym_off;
+ int fd;
+ } *jumptable_maps;
+ size_t jumptable_map_cnt;
+
struct kern_feature_cache *feat_cache;
char *token_path;
int token_fd;
@@ -765,6 +794,7 @@ void bpf_program__unload(struct bpf_program *prog)
zfree(&prog->func_info);
zfree(&prog->line_info);
+ zfree(&prog->subprogs);
}
static void bpf_program__exit(struct bpf_program *prog)
@@ -1013,35 +1043,33 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
const struct btf_member *kern_data_member;
struct btf *btf = NULL;
__s32 kern_vtype_id, kern_type_id;
- char tname[256];
+ char tname[192], stname[256];
__u32 i;
snprintf(tname, sizeof(tname), "%.*s",
(int)bpf_core_essential_name_len(tname_raw), tname_raw);
- kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT,
- &btf, mod_btf);
- if (kern_type_id < 0) {
- pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n",
- tname);
- return kern_type_id;
- }
- kern_type = btf__type_by_id(btf, kern_type_id);
+ snprintf(stname, sizeof(stname), "%s%s", STRUCT_OPS_VALUE_PREFIX, tname);
- /* Find the corresponding "map_value" type that will be used
- * in map_update(BPF_MAP_TYPE_STRUCT_OPS). For example,
- * find "struct bpf_struct_ops_tcp_congestion_ops" from the
- * btf_vmlinux.
+ /* Look for the corresponding "map_value" type that will be used
+ * in map_update(BPF_MAP_TYPE_STRUCT_OPS) first, figure out the btf
+ * and the mod_btf.
+ * For example, find "struct bpf_struct_ops_tcp_congestion_ops".
*/
- kern_vtype_id = find_btf_by_prefix_kind(btf, STRUCT_OPS_VALUE_PREFIX,
- tname, BTF_KIND_STRUCT);
+ kern_vtype_id = find_ksym_btf_id(obj, stname, BTF_KIND_STRUCT, &btf, mod_btf);
if (kern_vtype_id < 0) {
- pr_warn("struct_ops init_kern: struct %s%s is not found in kernel BTF\n",
- STRUCT_OPS_VALUE_PREFIX, tname);
+ pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", stname);
return kern_vtype_id;
}
kern_vtype = btf__type_by_id(btf, kern_vtype_id);
+ kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+ if (kern_type_id < 0) {
+ pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname);
+ return kern_type_id;
+ }
+ kern_type = btf__type_by_id(btf, kern_type_id);
+
/* Find "struct tcp_congestion_ops" from
* struct bpf_struct_ops_tcp_congestion_ops {
* [ ... ]
@@ -1054,8 +1082,8 @@ find_struct_ops_kern_types(struct bpf_object *obj, const char *tname_raw,
break;
}
if (i == btf_vlen(kern_vtype)) {
- pr_warn("struct_ops init_kern: struct %s data is not found in struct %s%s\n",
- tname, STRUCT_OPS_VALUE_PREFIX, tname);
+ pr_warn("struct_ops init_kern: struct %s data is not found in struct %s\n",
+ tname, stname);
return -EINVAL;
}
@@ -2999,7 +3027,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
scn = elf_sec_by_idx(obj, obj->efile.btf_maps_shndx);
data = elf_sec_data(obj, scn);
- if (!scn || !data) {
+ if (!data) {
pr_warn("elf: failed to get %s map definitions for %s\n",
MAPS_ELF_SEC, obj->path);
return -EINVAL;
@@ -3945,6 +3973,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
} else if (strcmp(name, ARENA_SEC) == 0) {
obj->efile.arena_data = data;
obj->efile.arena_data_shndx = idx;
+ } else if (strcmp(name, JUMPTABLES_SEC) == 0) {
+ obj->jumptables_data = malloc(data->d_size);
+ if (!obj->jumptables_data)
+ return -ENOMEM;
+ memcpy(obj->jumptables_data, data->d_buf, data->d_size);
+ obj->jumptables_data_sz = data->d_size;
+ obj->efile.jumptables_data_shndx = idx;
} else {
pr_info("elf: skipping unrecognized data section(%d) %s\n",
idx, name);
@@ -4485,6 +4520,44 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)
}
}
+static int bpf_prog_compute_hash(struct bpf_program *prog)
+{
+ struct bpf_insn *purged;
+ int i, err = 0;
+
+ purged = calloc(prog->insns_cnt, BPF_INSN_SZ);
+ if (!purged)
+ return -ENOMEM;
+
+ /* If relocations have been done, the map_fd needs to be
+ * discarded for the digest calculation.
+ */
+ for (i = 0; i < prog->insns_cnt; i++) {
+ purged[i] = prog->insns[i];
+ if (purged[i].code == (BPF_LD | BPF_IMM | BPF_DW) &&
+ (purged[i].src_reg == BPF_PSEUDO_MAP_FD ||
+ purged[i].src_reg == BPF_PSEUDO_MAP_VALUE)) {
+ purged[i].imm = 0;
+ i++;
+ if (i >= prog->insns_cnt ||
+ prog->insns[i].code != 0 ||
+ prog->insns[i].dst_reg != 0 ||
+ prog->insns[i].src_reg != 0 ||
+ prog->insns[i].off != 0) {
+ err = -EINVAL;
+ goto out;
+ }
+ purged[i] = prog->insns[i];
+ purged[i].imm = 0;
+ }
+ }
+ libbpf_sha256(purged, prog->insns_cnt * sizeof(struct bpf_insn),
+ prog->hash);
+out:
+ free(purged);
+ return err;
+}
+
static int bpf_program__record_reloc(struct bpf_program *prog,
struct reloc_desc *reloc_desc,
__u32 insn_idx, const char *sym_name,
@@ -4599,6 +4672,16 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
return 0;
}
+ /* jump table data relocation */
+ if (shdr_idx == obj->efile.jumptables_data_shndx) {
+ reloc_desc->type = RELO_INSN_ARRAY;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->map_idx = -1;
+ reloc_desc->sym_off = sym->st_value;
+ reloc_desc->sym_size = sym->st_size;
+ return 0;
+ }
+
/* generic map reference relocation */
if (type == LIBBPF_MAP_UNSPEC) {
if (!bpf_object__shndx_is_maps(obj, shdr_idx)) {
@@ -5093,6 +5176,16 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
return false;
}
+ /*
+ * bpf_get_map_info_by_fd() for DEVMAP will always return flags with
+ * BPF_F_RDONLY_PROG set, but it generally is not set at map creation time.
+ * Thus, ignore the BPF_F_RDONLY_PROG flag in the flags returned from
+ * bpf_get_map_info_by_fd() when checking for compatibility with an
+ * existing DEVMAP.
+ */
+ if (map->def.type == BPF_MAP_TYPE_DEVMAP || map->def.type == BPF_MAP_TYPE_DEVMAP_HASH)
+ map_info.map_flags &= ~BPF_F_RDONLY_PROG;
+
return (map_info.type == map->def.type &&
map_info.key_size == map->def.key_size &&
map_info.value_size == map->def.value_size &&
@@ -5224,6 +5317,14 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b
create_attr.token_fd = obj->token_fd;
if (obj->token_fd)
create_attr.map_flags |= BPF_F_TOKEN_FD;
+ if (map->excl_prog) {
+ err = bpf_prog_compute_hash(map->excl_prog);
+ if (err)
+ return err;
+
+ create_attr.excl_prog_hash = map->excl_prog->hash;
+ create_attr.excl_prog_hash_size = SHA256_DIGEST_LENGTH;
+ }
if (bpf_map__is_struct_ops(map)) {
create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id;
@@ -6091,6 +6192,157 @@ static void poison_kfunc_call(struct bpf_program *prog, int relo_idx,
insn->imm = POISON_CALL_KFUNC_BASE + ext_idx;
}
+static int find_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off)
+{
+ size_t i;
+
+ for (i = 0; i < obj->jumptable_map_cnt; i++) {
+ /*
+ * This might happen that same offset is used for two different
+ * programs (as jump tables can be the same). However, for
+ * different programs different maps should be created.
+ */
+ if (obj->jumptable_maps[i].sym_off == sym_off &&
+ obj->jumptable_maps[i].prog == prog)
+ return obj->jumptable_maps[i].fd;
+ }
+
+ return -ENOENT;
+}
+
+static int add_jt_map(struct bpf_object *obj, struct bpf_program *prog, int sym_off, int map_fd)
+{
+ size_t cnt = obj->jumptable_map_cnt;
+ size_t size = sizeof(obj->jumptable_maps[0]);
+ void *tmp;
+
+ tmp = libbpf_reallocarray(obj->jumptable_maps, cnt + 1, size);
+ if (!tmp)
+ return -ENOMEM;
+
+ obj->jumptable_maps = tmp;
+ obj->jumptable_maps[cnt].prog = prog;
+ obj->jumptable_maps[cnt].sym_off = sym_off;
+ obj->jumptable_maps[cnt].fd = map_fd;
+ obj->jumptable_map_cnt++;
+
+ return 0;
+}
+
+static int find_subprog_idx(struct bpf_program *prog, int insn_idx)
+{
+ int i;
+
+ for (i = prog->subprog_cnt - 1; i >= 0; i--) {
+ if (insn_idx >= prog->subprogs[i].sub_insn_off)
+ return i;
+ }
+
+ return -1;
+}
+
+static int create_jt_map(struct bpf_object *obj, struct bpf_program *prog, struct reloc_desc *relo)
+{
+ const __u32 jt_entry_size = 8;
+ int sym_off = relo->sym_off;
+ int jt_size = relo->sym_size;
+ __u32 max_entries = jt_size / jt_entry_size;
+ __u32 value_size = sizeof(struct bpf_insn_array_value);
+ struct bpf_insn_array_value val = {};
+ int subprog_idx;
+ int map_fd, err;
+ __u64 insn_off;
+ __u64 *jt;
+ __u32 i;
+
+ map_fd = find_jt_map(obj, prog, sym_off);
+ if (map_fd >= 0)
+ return map_fd;
+
+ if (sym_off % jt_entry_size) {
+ pr_warn("map '.jumptables': jumptable start %d should be multiple of %u\n",
+ sym_off, jt_entry_size);
+ return -EINVAL;
+ }
+
+ if (jt_size % jt_entry_size) {
+ pr_warn("map '.jumptables': jumptable size %d should be multiple of %u\n",
+ jt_size, jt_entry_size);
+ return -EINVAL;
+ }
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_INSN_ARRAY, ".jumptables",
+ 4, value_size, max_entries, NULL);
+ if (map_fd < 0)
+ return map_fd;
+
+ if (!obj->jumptables_data) {
+ pr_warn("map '.jumptables': ELF file is missing jump table data\n");
+ err = -EINVAL;
+ goto err_close;
+ }
+ if (sym_off + jt_size > obj->jumptables_data_sz) {
+ pr_warn("map '.jumptables': jumptables_data size is %zd, trying to access %d\n",
+ obj->jumptables_data_sz, sym_off + jt_size);
+ err = -EINVAL;
+ goto err_close;
+ }
+
+ subprog_idx = -1; /* main program */
+ if (relo->insn_idx < 0 || relo->insn_idx >= prog->insns_cnt) {
+ pr_warn("map '.jumptables': invalid instruction index %d\n", relo->insn_idx);
+ err = -EINVAL;
+ goto err_close;
+ }
+ if (prog->subprogs)
+ subprog_idx = find_subprog_idx(prog, relo->insn_idx);
+
+ jt = (__u64 *)(obj->jumptables_data + sym_off);
+ for (i = 0; i < max_entries; i++) {
+ /*
+ * The offset should be made to be relative to the beginning of
+ * the main function, not the subfunction.
+ */
+ insn_off = jt[i]/sizeof(struct bpf_insn);
+ if (subprog_idx >= 0) {
+ insn_off -= prog->subprogs[subprog_idx].sec_insn_off;
+ insn_off += prog->subprogs[subprog_idx].sub_insn_off;
+ } else {
+ insn_off -= prog->sec_insn_off;
+ }
+
+ /*
+ * LLVM-generated jump tables contain u64 records, however
+ * should contain values that fit in u32.
+ */
+ if (insn_off > UINT32_MAX) {
+ pr_warn("map '.jumptables': invalid jump table value 0x%llx at offset %d\n",
+ (long long)jt[i], sym_off + i * jt_entry_size);
+ err = -EINVAL;
+ goto err_close;
+ }
+
+ val.orig_off = insn_off;
+ err = bpf_map_update_elem(map_fd, &i, &val, 0);
+ if (err)
+ goto err_close;
+ }
+
+ err = bpf_map_freeze(map_fd);
+ if (err)
+ goto err_close;
+
+ err = add_jt_map(obj, prog, sym_off, map_fd);
+ if (err)
+ goto err_close;
+
+ return map_fd;
+
+err_close:
+ close(map_fd);
+ return err;
+}
+
/* Relocate data references within program code:
* - map references;
* - global variable references;
@@ -6182,6 +6434,20 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
case RELO_CORE:
/* will be handled by bpf_program_record_relos() */
break;
+ case RELO_INSN_ARRAY: {
+ int map_fd;
+
+ map_fd = create_jt_map(obj, prog, relo);
+ if (map_fd < 0) {
+ pr_warn("prog '%s': relo #%d: can't create jump table: sym_off %u\n",
+ prog->name, i, relo->sym_off);
+ return map_fd;
+ }
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn->imm = map_fd;
+ insn->off = 0;
+ }
+ break;
default:
pr_warn("prog '%s': relo #%d: bad relo type %d\n",
prog->name, i, relo->type);
@@ -6379,36 +6645,62 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra
return 0;
}
+static int save_subprog_offsets(struct bpf_program *main_prog, struct bpf_program *subprog)
+{
+ size_t size = sizeof(main_prog->subprogs[0]);
+ int cnt = main_prog->subprog_cnt;
+ void *tmp;
+
+ tmp = libbpf_reallocarray(main_prog->subprogs, cnt + 1, size);
+ if (!tmp)
+ return -ENOMEM;
+
+ main_prog->subprogs = tmp;
+ main_prog->subprogs[cnt].sec_insn_off = subprog->sec_insn_off;
+ main_prog->subprogs[cnt].sub_insn_off = subprog->sub_insn_off;
+ main_prog->subprog_cnt++;
+
+ return 0;
+}
+
static int
bpf_object__append_subprog_code(struct bpf_object *obj, struct bpf_program *main_prog,
struct bpf_program *subprog)
{
- struct bpf_insn *insns;
- size_t new_cnt;
- int err;
+ struct bpf_insn *insns;
+ size_t new_cnt;
+ int err;
+
+ subprog->sub_insn_off = main_prog->insns_cnt;
- subprog->sub_insn_off = main_prog->insns_cnt;
+ new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
+ insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
+ if (!insns) {
+ pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
+ return -ENOMEM;
+ }
+ main_prog->insns = insns;
+ main_prog->insns_cnt = new_cnt;
- new_cnt = main_prog->insns_cnt + subprog->insns_cnt;
- insns = libbpf_reallocarray(main_prog->insns, new_cnt, sizeof(*insns));
- if (!insns) {
- pr_warn("prog '%s': failed to realloc prog code\n", main_prog->name);
- return -ENOMEM;
- }
- main_prog->insns = insns;
- main_prog->insns_cnt = new_cnt;
+ memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
+ subprog->insns_cnt * sizeof(*insns));
- memcpy(main_prog->insns + subprog->sub_insn_off, subprog->insns,
- subprog->insns_cnt * sizeof(*insns));
+ pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
+ main_prog->name, subprog->insns_cnt, subprog->name);
- pr_debug("prog '%s': added %zu insns from sub-prog '%s'\n",
- main_prog->name, subprog->insns_cnt, subprog->name);
+ /* The subprog insns are now appended. Append its relos too. */
+ err = append_subprog_relos(main_prog, subprog);
+ if (err)
+ return err;
- /* The subprog insns are now appended. Append its relos too. */
- err = append_subprog_relos(main_prog, subprog);
- if (err)
- return err;
- return 0;
+ err = save_subprog_offsets(main_prog, subprog);
+ if (err) {
+ pr_warn("prog '%s': failed to add subprog offsets: %s\n",
+ main_prog->name, errstr(err));
+ return err;
+ }
+
+ return 0;
}
static int
@@ -9175,6 +9467,13 @@ void bpf_object__close(struct bpf_object *obj)
zfree(&obj->arena_data);
+ zfree(&obj->jumptables_data);
+ obj->jumptables_data_sz = 0;
+
+ for (i = 0; i < obj->jumptable_map_cnt; i++)
+ close(obj->jumptable_maps[i].fd);
+ zfree(&obj->jumptable_maps);
+
free(obj);
}
@@ -10514,6 +10813,27 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
return 0;
}
+int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog)
+{
+ if (map_is_created(map)) {
+ pr_warn("exclusive programs must be set before map creation\n");
+ return libbpf_err(-EINVAL);
+ }
+
+ if (map->obj != prog->obj) {
+ pr_warn("excl_prog and map must be from the same bpf object\n");
+ return libbpf_err(-EINVAL);
+ }
+
+ map->excl_prog = prog;
+ return 0;
+}
+
+struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map)
+{
+ return map->excl_prog;
+}
+
static struct bpf_map *
__bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)
{
@@ -11251,8 +11571,6 @@ static const char *arch_specific_syscall_pfx(void)
return "ia32";
#elif defined(__s390x__)
return "s390x";
-#elif defined(__s390__)
- return "s390";
#elif defined(__arm__)
return "arm";
#elif defined(__aarch64__)
@@ -12039,8 +12357,6 @@ static const char *arch_specific_lib_paths(void)
return "/lib/i386-linux-gnu";
#elif defined(__s390x__)
return "/lib/s390x-linux-gnu";
-#elif defined(__s390__)
- return "/lib/s390-linux-gnu";
#elif defined(__arm__) && defined(__SOFTFP__)
return "/lib/arm-linux-gnueabi";
#elif defined(__arm__) && !defined(__SOFTFP__)
@@ -13784,8 +14100,8 @@ int bpf_program__set_attach_target(struct bpf_program *prog,
return libbpf_err(-EINVAL);
if (attach_prog_fd && !attach_func_name) {
- /* remember attach_prog_fd and let bpf_program__load() find
- * BTF ID during the program load
+ /* Store attach_prog_fd. The BTF ID will be resolved later during
+ * the normal object/program load phase.
*/
prog->attach_prog_fd = attach_prog_fd;
return 0;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 455a957cb702..65e68e964b89 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -24,8 +24,25 @@
extern "C" {
#endif
+/**
+ * @brief **libbpf_major_version()** provides the major version of libbpf.
+ * @return An integer, the major version number
+ */
LIBBPF_API __u32 libbpf_major_version(void);
+
+/**
+ * @brief **libbpf_minor_version()** provides the minor version of libbpf.
+ * @return An integer, the minor version number
+ */
LIBBPF_API __u32 libbpf_minor_version(void);
+
+/**
+ * @brief **libbpf_version_string()** provides the version of libbpf in a
+ * human-readable form, e.g., "v1.7".
+ * @return Pointer to a static string containing the version
+ *
+ * The format is *not* a part of a stable API and may change in the future.
+ */
LIBBPF_API const char *libbpf_version_string(void);
enum libbpf_errno {
@@ -49,6 +66,14 @@ enum libbpf_errno {
__LIBBPF_ERRNO__END,
};
+/**
+ * @brief **libbpf_strerror()** converts the provided error code into a
+ * human-readable string.
+ * @param err The error code to convert
+ * @param buf Pointer to a buffer where the error message will be stored
+ * @param size The number of bytes in the buffer
+ * @return 0, on success; negative error code, otherwise
+ */
LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size);
/**
@@ -252,7 +277,7 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,
* @return 0, on success; negative error code, otherwise, error code is
* stored in errno
*/
-int bpf_object__prepare(struct bpf_object *obj);
+LIBBPF_API int bpf_object__prepare(struct bpf_object *obj);
/**
* @brief **bpf_object__load()** loads BPF object into kernel.
@@ -423,7 +448,7 @@ LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path);
/**
* @brief **bpf_program__unpin()** unpins the BPF program from a file
- * in the BPFFS specified by a path. This decrements the programs
+ * in the BPFFS specified by a path. This decrements program's in-kernel
* reference count.
*
* The file pinning the BPF program can also be unlinked by a different
@@ -456,14 +481,12 @@ LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
/**
* @brief **bpf_link__unpin()** unpins the BPF link from a file
- * in the BPFFS specified by a path. This decrements the links
- * reference count.
+ * in the BPFFS. This decrements link's in-kernel reference count.
*
* The file pinning the BPF link can also be unlinked by a different
* process in which case this function will return an error.
*
- * @param prog BPF program to unpin
- * @param path file path to the pin in a BPF file system
+ * @param link BPF link to unpin
* @return 0, on success; negative error code, otherwise
*/
LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
@@ -970,8 +993,13 @@ LIBBPF_API __u32 bpf_program__line_info_cnt(const struct bpf_program *prog);
* - fentry/fexit/fmod_ret;
* - lsm;
* - freplace.
- * @param prog BPF program to set the attach type for
- * @param type attach type to set the BPF map to have
+ * @param prog BPF program to configure; must be not yet loaded.
+ * @param attach_prog_fd FD of target BPF program (for freplace/extension).
+ * If >0 and func name omitted, defers BTF ID resolution.
+ * @param attach_func_name Target function name. Used either with
+ * attach_prog_fd to find destination BTF type ID in that BPF program, or
+ * alone (no attach_prog_fd) to resolve kernel (vmlinux/module) BTF ID.
+ * Must be provided if attach_prog_fd is 0.
* @return error code; or 0 if no error occurred.
*/
LIBBPF_API int
@@ -1073,6 +1101,7 @@ LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
/**
* @brief **bpf_map__set_value_size()** sets map value size.
* @param map the BPF map instance
+ * @param size the new value size
* @return 0, on success; negative error, otherwise
*
* There is a special case for maps with associated memory-mapped regions, like
@@ -1177,7 +1206,7 @@ LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map);
* per-CPU values value size has to be aligned up to closest 8 bytes for
* alignment reasons, so expected size is: `round_up(value_size, 8)
* * libbpf_num_possible_cpus()`.
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
* @return 0, on success; negative error, otherwise
*
* **bpf_map__lookup_elem()** is high-level equivalent of
@@ -1201,7 +1230,7 @@ LIBBPF_API int bpf_map__lookup_elem(const struct bpf_map *map,
* per-CPU values value size has to be aligned up to closest 8 bytes for
* alignment reasons, so expected size is: `round_up(value_size, 8)
* * libbpf_num_possible_cpus()`.
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
* @return 0, on success; negative error, otherwise
*
* **bpf_map__update_elem()** is high-level equivalent of
@@ -1217,7 +1246,7 @@ LIBBPF_API int bpf_map__update_elem(const struct bpf_map *map,
* @param map BPF map to delete element from
* @param key pointer to memory containing bytes of the key
* @param key_sz size in bytes of key data, needs to match BPF map definition's **key_size**
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
* @return 0, on success; negative error, otherwise
*
* **bpf_map__delete_elem()** is high-level equivalent of
@@ -1240,7 +1269,7 @@ LIBBPF_API int bpf_map__delete_elem(const struct bpf_map *map,
* per-CPU values value size has to be aligned up to closest 8 bytes for
* alignment reasons, so expected size is: `round_up(value_size, 8)
* * libbpf_num_possible_cpus()`.
- * @flags extra flags passed to kernel for this operation
+ * @param flags extra flags passed to kernel for this operation
* @return 0, on success; negative error, otherwise
*
* **bpf_map__lookup_and_delete_elem()** is high-level equivalent of
@@ -1266,6 +1295,28 @@ LIBBPF_API int bpf_map__lookup_and_delete_elem(const struct bpf_map *map,
*/
LIBBPF_API int bpf_map__get_next_key(const struct bpf_map *map,
const void *cur_key, void *next_key, size_t key_sz);
+/**
+ * @brief **bpf_map__set_exclusive_program()** sets a map to be exclusive to the
+ * specified program. This must be called *before* the map is created.
+ *
+ * @param map BPF map to make exclusive.
+ * @param prog BPF program to be the exclusive user of the map. Must belong
+ * to the same bpf_object as the map.
+ * @return 0 on success; a negative error code otherwise.
+ *
+ * This function must be called after the BPF object is opened but before
+ * it is loaded. Once the object is loaded, only the specified program
+ * will be able to access the map's contents.
+ */
+LIBBPF_API int bpf_map__set_exclusive_program(struct bpf_map *map, struct bpf_program *prog);
+
+/**
+ * @brief **bpf_map__exclusive_program()** returns the exclusive program
+ * that is registered with the map (if any).
+ * @param map BPF map to which the exclusive program is registered.
+ * @return the registered exclusive program.
+ */
+LIBBPF_API struct bpf_program *bpf_map__exclusive_program(struct bpf_map *map);
struct bpf_xdp_set_link_opts {
size_t sz;
@@ -1590,6 +1641,7 @@ struct perf_buffer_opts {
* @param sample_cb function called on each received data record
* @param lost_cb function called when record loss has occurred
* @param ctx user-provided extra context passed into *sample_cb* and *lost_cb*
+ * @param opts optional parameters for the perf buffer, can be null
* @return a new instance of struct perf_buffer on success, NULL on error with
* *errno* containing an error code
*/
@@ -1810,9 +1862,10 @@ struct gen_loader_opts {
const char *insns;
__u32 data_sz;
__u32 insns_sz;
+ bool gen_hash;
};
-#define gen_loader_opts__last_field insns_sz
+#define gen_loader_opts__last_field gen_hash
LIBBPF_API int bpf_object__gen_loader(struct bpf_object *obj,
struct gen_loader_opts *opts);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index d7bd463e7017..8ed8749907d4 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -448,4 +448,7 @@ LIBBPF_1.6.0 {
} LIBBPF_1.5.0;
LIBBPF_1.7.0 {
+ global:
+ bpf_map__set_exclusive_program;
+ bpf_map__exclusive_program;
} LIBBPF_1.6.0;
diff --git a/tools/lib/bpf/libbpf_errno.c b/tools/lib/bpf/libbpf_errno.c
deleted file mode 100644
index 6b180172ec6b..000000000000
--- a/tools/lib/bpf/libbpf_errno.c
+++ /dev/null
@@ -1,75 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-/*
- * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
- * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
- * Copyright (C) 2015 Huawei Inc.
- * Copyright (C) 2017 Nicira, Inc.
- */
-
-#undef _GNU_SOURCE
-#include <stdio.h>
-#include <string.h>
-
-#include "libbpf.h"
-#include "libbpf_internal.h"
-
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
-#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START)
-#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c)
-#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
-
-static const char *libbpf_strerror_table[NR_ERRNO] = {
- [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf",
- [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid",
- [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost",
- [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch",
- [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf",
- [ERRCODE_OFFSET(RELOC)] = "Relocation failed",
- [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading",
- [ERRCODE_OFFSET(PROG2BIG)] = "Program too big",
- [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version",
- [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type",
- [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message",
- [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence",
- [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing",
-};
-
-int libbpf_strerror(int err, char *buf, size_t size)
-{
- int ret;
-
- if (!buf || !size)
- return libbpf_err(-EINVAL);
-
- err = err > 0 ? err : -err;
-
- if (err < __LIBBPF_ERRNO__START) {
- ret = strerror_r(err, buf, size);
- buf[size - 1] = '\0';
- return libbpf_err_errno(ret);
- }
-
- if (err < __LIBBPF_ERRNO__END) {
- const char *msg;
-
- msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
- ret = snprintf(buf, size, "%s", msg);
- buf[size - 1] = '\0';
- /* The length of the buf and msg is positive.
- * A negative number may be returned only when the
- * size exceeds INT_MAX. Not likely to appear.
- */
- if (ret >= size)
- return libbpf_err(-ERANGE);
- return 0;
- }
-
- ret = snprintf(buf, size, "Unknown libbpf error %d", err);
- buf[size - 1] = '\0';
- if (ret >= size)
- return libbpf_err(-ERANGE);
- return libbpf_err(-ENOENT);
-}
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 477a3b3389a0..fc59b21b51b5 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -74,6 +74,8 @@
#define ELF64_ST_VISIBILITY(o) ((o) & 0x03)
#endif
+#define JUMPTABLES_SEC ".jumptables"
+
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
@@ -172,6 +174,16 @@ do { \
#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)
#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__)
+/**
+ * @brief **libbpf_errstr()** returns string corresponding to numeric errno
+ * @param err negative numeric errno
+ * @return pointer to string representation of the errno, that is invalidated
+ * upon the next call.
+ */
+const char *libbpf_errstr(int err);
+
+#define errstr(err) libbpf_errstr(err)
+
#ifndef __has_builtin
#define __has_builtin(x) 0
#endif
@@ -712,6 +724,11 @@ static inline bool is_pow_of_2(size_t x)
return x && (x & (x - 1)) == 0;
}
+static inline __u32 ror32(__u32 v, int bits)
+{
+ return (v >> bits) | (v << (32 - bits));
+}
+
#define PROG_LOAD_ATTEMPTS 5
int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts);
@@ -736,4 +753,8 @@ int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern,
int probe_fd(int fd);
+#define SHA256_DIGEST_LENGTH 32
+#define SHA256_DWORD_SIZE SHA256_DIGEST_LENGTH / sizeof(__u64)
+
+void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH]);
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 9dfbe7750f56..bccf4bb747e1 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -364,6 +364,10 @@ static int probe_map_create(enum bpf_map_type map_type)
case BPF_MAP_TYPE_SOCKHASH:
case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
break;
+ case BPF_MAP_TYPE_INSN_ARRAY:
+ key_size = sizeof(__u32);
+ value_size = sizeof(struct bpf_insn_array_value);
+ break;
case BPF_MAP_TYPE_UNSPEC:
default:
return -EOPNOTSUPP;
diff --git a/tools/lib/bpf/libbpf_utils.c b/tools/lib/bpf/libbpf_utils.c
new file mode 100644
index 000000000000..ac3beae54cf6
--- /dev/null
+++ b/tools/lib/bpf/libbpf_utils.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Copyright (C) 2013-2015 Alexei Starovoitov <ast@kernel.org>
+ * Copyright (C) 2015 Wang Nan <wangnan0@huawei.com>
+ * Copyright (C) 2015 Huawei Inc.
+ * Copyright (C) 2017 Nicira, Inc.
+ */
+
+#undef _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <linux/kernel.h>
+
+#include "libbpf.h"
+#include "libbpf_internal.h"
+
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+#define ERRNO_OFFSET(e) ((e) - __LIBBPF_ERRNO__START)
+#define ERRCODE_OFFSET(c) ERRNO_OFFSET(LIBBPF_ERRNO__##c)
+#define NR_ERRNO (__LIBBPF_ERRNO__END - __LIBBPF_ERRNO__START)
+
+static const char *libbpf_strerror_table[NR_ERRNO] = {
+ [ERRCODE_OFFSET(LIBELF)] = "Something wrong in libelf",
+ [ERRCODE_OFFSET(FORMAT)] = "BPF object format invalid",
+ [ERRCODE_OFFSET(KVERSION)] = "'version' section incorrect or lost",
+ [ERRCODE_OFFSET(ENDIAN)] = "Endian mismatch",
+ [ERRCODE_OFFSET(INTERNAL)] = "Internal error in libbpf",
+ [ERRCODE_OFFSET(RELOC)] = "Relocation failed",
+ [ERRCODE_OFFSET(VERIFY)] = "Kernel verifier blocks program loading",
+ [ERRCODE_OFFSET(PROG2BIG)] = "Program too big",
+ [ERRCODE_OFFSET(KVER)] = "Incorrect kernel version",
+ [ERRCODE_OFFSET(PROGTYPE)] = "Kernel doesn't support this program type",
+ [ERRCODE_OFFSET(WRNGPID)] = "Wrong pid in netlink message",
+ [ERRCODE_OFFSET(INVSEQ)] = "Invalid netlink sequence",
+ [ERRCODE_OFFSET(NLPARSE)] = "Incorrect netlink message parsing",
+};
+
+int libbpf_strerror(int err, char *buf, size_t size)
+{
+ int ret;
+
+ if (!buf || !size)
+ return libbpf_err(-EINVAL);
+
+ err = err > 0 ? err : -err;
+
+ if (err < __LIBBPF_ERRNO__START) {
+ ret = strerror_r(err, buf, size);
+ buf[size - 1] = '\0';
+ return libbpf_err_errno(ret);
+ }
+
+ if (err < __LIBBPF_ERRNO__END) {
+ const char *msg;
+
+ msg = libbpf_strerror_table[ERRNO_OFFSET(err)];
+ ret = snprintf(buf, size, "%s", msg);
+ buf[size - 1] = '\0';
+ /* The length of the buf and msg is positive.
+ * A negative number may be returned only when the
+ * size exceeds INT_MAX. Not likely to appear.
+ */
+ if (ret >= size)
+ return libbpf_err(-ERANGE);
+ return 0;
+ }
+
+ ret = snprintf(buf, size, "Unknown libbpf error %d", err);
+ buf[size - 1] = '\0';
+ if (ret >= size)
+ return libbpf_err(-ERANGE);
+ return libbpf_err(-ENOENT);
+}
+
+const char *libbpf_errstr(int err)
+{
+ static __thread char buf[12];
+
+ if (err > 0)
+ err = -err;
+
+ switch (err) {
+ case -E2BIG: return "-E2BIG";
+ case -EACCES: return "-EACCES";
+ case -EADDRINUSE: return "-EADDRINUSE";
+ case -EADDRNOTAVAIL: return "-EADDRNOTAVAIL";
+ case -EAGAIN: return "-EAGAIN";
+ case -EALREADY: return "-EALREADY";
+ case -EBADF: return "-EBADF";
+ case -EBADFD: return "-EBADFD";
+ case -EBUSY: return "-EBUSY";
+ case -ECANCELED: return "-ECANCELED";
+ case -ECHILD: return "-ECHILD";
+ case -EDEADLK: return "-EDEADLK";
+ case -EDOM: return "-EDOM";
+ case -EEXIST: return "-EEXIST";
+ case -EFAULT: return "-EFAULT";
+ case -EFBIG: return "-EFBIG";
+ case -EILSEQ: return "-EILSEQ";
+ case -EINPROGRESS: return "-EINPROGRESS";
+ case -EINTR: return "-EINTR";
+ case -EINVAL: return "-EINVAL";
+ case -EIO: return "-EIO";
+ case -EISDIR: return "-EISDIR";
+ case -ELOOP: return "-ELOOP";
+ case -EMFILE: return "-EMFILE";
+ case -EMLINK: return "-EMLINK";
+ case -EMSGSIZE: return "-EMSGSIZE";
+ case -ENAMETOOLONG: return "-ENAMETOOLONG";
+ case -ENFILE: return "-ENFILE";
+ case -ENODATA: return "-ENODATA";
+ case -ENODEV: return "-ENODEV";
+ case -ENOENT: return "-ENOENT";
+ case -ENOEXEC: return "-ENOEXEC";
+ case -ENOLINK: return "-ENOLINK";
+ case -ENOMEM: return "-ENOMEM";
+ case -ENOSPC: return "-ENOSPC";
+ case -ENOTBLK: return "-ENOTBLK";
+ case -ENOTDIR: return "-ENOTDIR";
+ case -ENOTSUPP: return "-ENOTSUPP";
+ case -ENOTTY: return "-ENOTTY";
+ case -ENXIO: return "-ENXIO";
+ case -EOPNOTSUPP: return "-EOPNOTSUPP";
+ case -EOVERFLOW: return "-EOVERFLOW";
+ case -EPERM: return "-EPERM";
+ case -EPIPE: return "-EPIPE";
+ case -EPROTO: return "-EPROTO";
+ case -EPROTONOSUPPORT: return "-EPROTONOSUPPORT";
+ case -ERANGE: return "-ERANGE";
+ case -EROFS: return "-EROFS";
+ case -ESPIPE: return "-ESPIPE";
+ case -ESRCH: return "-ESRCH";
+ case -ETXTBSY: return "-ETXTBSY";
+ case -EUCLEAN: return "-EUCLEAN";
+ case -EXDEV: return "-EXDEV";
+ default:
+ snprintf(buf, sizeof(buf), "%d", err);
+ return buf;
+ }
+}
+
+static inline __u32 get_unaligned_be32(const void *p)
+{
+ __be32 val;
+
+ memcpy(&val, p, sizeof(val));
+ return be32_to_cpu(val);
+}
+
+static inline void put_unaligned_be32(__u32 val, void *p)
+{
+ __be32 be_val = cpu_to_be32(val);
+
+ memcpy(p, &be_val, sizeof(be_val));
+}
+
+#define SHA256_BLOCK_LENGTH 64
+#define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z)))
+#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+#define Sigma_0(x) (ror32((x), 2) ^ ror32((x), 13) ^ ror32((x), 22))
+#define Sigma_1(x) (ror32((x), 6) ^ ror32((x), 11) ^ ror32((x), 25))
+#define sigma_0(x) (ror32((x), 7) ^ ror32((x), 18) ^ ((x) >> 3))
+#define sigma_1(x) (ror32((x), 17) ^ ror32((x), 19) ^ ((x) >> 10))
+
+static const __u32 sha256_K[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1,
+ 0x923f82a4, 0xab1c5ed5, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, 0xe49b69c1, 0xefbe4786,
+ 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147,
+ 0x06ca6351, 0x14292967, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, 0xa2bfe8a1, 0xa81a664b,
+ 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a,
+ 0x5b9cca4f, 0x682e6ff3, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+};
+
+#define SHA256_ROUND(i, a, b, c, d, e, f, g, h) \
+ { \
+ __u32 tmp = h + Sigma_1(e) + Ch(e, f, g) + sha256_K[i] + w[i]; \
+ d += tmp; \
+ h = tmp + Sigma_0(a) + Maj(a, b, c); \
+ }
+
+static void sha256_blocks(__u32 state[8], const __u8 *data, size_t nblocks)
+{
+ while (nblocks--) {
+ __u32 a = state[0];
+ __u32 b = state[1];
+ __u32 c = state[2];
+ __u32 d = state[3];
+ __u32 e = state[4];
+ __u32 f = state[5];
+ __u32 g = state[6];
+ __u32 h = state[7];
+ __u32 w[64];
+ int i;
+
+ for (i = 0; i < 16; i++)
+ w[i] = get_unaligned_be32(&data[4 * i]);
+ for (; i < ARRAY_SIZE(w); i++)
+ w[i] = sigma_1(w[i - 2]) + w[i - 7] +
+ sigma_0(w[i - 15]) + w[i - 16];
+ for (i = 0; i < ARRAY_SIZE(w); i += 8) {
+ SHA256_ROUND(i + 0, a, b, c, d, e, f, g, h);
+ SHA256_ROUND(i + 1, h, a, b, c, d, e, f, g);
+ SHA256_ROUND(i + 2, g, h, a, b, c, d, e, f);
+ SHA256_ROUND(i + 3, f, g, h, a, b, c, d, e);
+ SHA256_ROUND(i + 4, e, f, g, h, a, b, c, d);
+ SHA256_ROUND(i + 5, d, e, f, g, h, a, b, c);
+ SHA256_ROUND(i + 6, c, d, e, f, g, h, a, b);
+ SHA256_ROUND(i + 7, b, c, d, e, f, g, h, a);
+ }
+ state[0] += a;
+ state[1] += b;
+ state[2] += c;
+ state[3] += d;
+ state[4] += e;
+ state[5] += f;
+ state[6] += g;
+ state[7] += h;
+ data += SHA256_BLOCK_LENGTH;
+ }
+}
+
+void libbpf_sha256(const void *data, size_t len, __u8 out[SHA256_DIGEST_LENGTH])
+{
+ __u32 state[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+ 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
+ const __be64 bitcount = cpu_to_be64((__u64)len * 8);
+ __u8 final_data[2 * SHA256_BLOCK_LENGTH] = { 0 };
+ size_t final_len = len % SHA256_BLOCK_LENGTH;
+ int i;
+
+ sha256_blocks(state, data, len / SHA256_BLOCK_LENGTH);
+
+ memcpy(final_data, data + len - final_len, final_len);
+ final_data[final_len] = 0x80;
+ final_len = roundup(final_len + 9, SHA256_BLOCK_LENGTH);
+ memcpy(&final_data[final_len - 8], &bitcount, 8);
+
+ sha256_blocks(state, final_data, final_len / SHA256_BLOCK_LENGTH);
+
+ for (i = 0; i < ARRAY_SIZE(state); i++)
+ put_unaligned_be32(state[i], &out[4 * i]);
+}
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
index a469e5d4fee7..f4403e3cf994 100644
--- a/tools/lib/bpf/linker.c
+++ b/tools/lib/bpf/linker.c
@@ -25,7 +25,6 @@
#include "btf.h"
#include "libbpf_internal.h"
#include "strset.h"
-#include "str_error.h"
#define BTF_EXTERN_SEC ".extern"
@@ -2026,6 +2025,9 @@ static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx;
return 0;
}
+
+ if (strcmp(src_sec->sec_name, JUMPTABLES_SEC) == 0)
+ goto add_sym;
}
if (sym_bind == STB_LOCAL)
diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c
index 2b83c98a1137..6eea5edba58a 100644
--- a/tools/lib/bpf/relo_core.c
+++ b/tools/lib/bpf/relo_core.c
@@ -64,7 +64,6 @@ enum libbpf_print_level {
#include "libbpf.h"
#include "bpf.h"
#include "btf.h"
-#include "str_error.h"
#include "libbpf_internal.h"
#endif
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 9702b70da444..00ec4837a06d 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -21,7 +21,6 @@
#include "libbpf.h"
#include "libbpf_internal.h"
#include "bpf.h"
-#include "str_error.h"
struct ring {
ring_buffer_sample_fn sample_cb;
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index 4d5fa079b5d6..6a8f5c7a02eb 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -13,10 +13,15 @@
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/mman.h>
+#include <linux/keyctl.h>
#include <stdlib.h>
#include "bpf.h"
#endif
+#ifndef SHA256_DIGEST_LENGTH
+#define SHA256_DIGEST_LENGTH 32
+#endif
+
#ifndef __NR_bpf
# if defined(__mips__) && defined(_ABIO32)
# define __NR_bpf 4355
@@ -64,6 +69,11 @@ struct bpf_load_and_run_opts {
__u32 data_sz;
__u32 insns_sz;
const char *errstr;
+ void *signature;
+ __u32 signature_sz;
+ __s32 keyring_id;
+ void *excl_prog_hash;
+ __u32 excl_prog_hash_sz;
};
long kern_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
@@ -220,14 +230,19 @@ static inline int skel_map_create(enum bpf_map_type map_type,
const char *map_name,
__u32 key_size,
__u32 value_size,
- __u32 max_entries)
+ __u32 max_entries,
+ const void *excl_prog_hash,
+ __u32 excl_prog_hash_sz)
{
- const size_t attr_sz = offsetofend(union bpf_attr, map_extra);
+ const size_t attr_sz = offsetofend(union bpf_attr, excl_prog_hash_size);
union bpf_attr attr;
memset(&attr, 0, attr_sz);
attr.map_type = map_type;
+ attr.excl_prog_hash = (unsigned long) excl_prog_hash;
+ attr.excl_prog_hash_size = excl_prog_hash_sz;
+
strncpy(attr.map_name, map_name, sizeof(attr.map_name));
attr.key_size = key_size;
attr.value_size = value_size;
@@ -300,6 +315,35 @@ static inline int skel_link_create(int prog_fd, int target_fd,
return skel_sys_bpf(BPF_LINK_CREATE, &attr, attr_sz);
}
+static inline int skel_obj_get_info_by_fd(int fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, info);
+ __u8 sha[SHA256_DIGEST_LENGTH];
+ struct bpf_map_info info;
+ __u32 info_len = sizeof(info);
+ union bpf_attr attr;
+
+ memset(&info, 0, sizeof(info));
+ info.hash = (long) &sha;
+ info.hash_size = SHA256_DIGEST_LENGTH;
+
+ memset(&attr, 0, attr_sz);
+ attr.info.bpf_fd = fd;
+ attr.info.info = (long) &info;
+ attr.info.info_len = info_len;
+ return skel_sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz);
+}
+
+static inline int skel_map_freeze(int fd)
+{
+ const size_t attr_sz = offsetofend(union bpf_attr, map_fd);
+ union bpf_attr attr;
+
+ memset(&attr, 0, attr_sz);
+ attr.map_fd = fd;
+
+ return skel_sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz);
+}
#ifdef __KERNEL__
#define set_err
#else
@@ -308,12 +352,13 @@ static inline int skel_link_create(int prog_fd, int target_fd,
static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
{
- const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array);
+ const size_t prog_load_attr_sz = offsetofend(union bpf_attr, keyring_id);
const size_t test_run_attr_sz = offsetofend(union bpf_attr, test);
int map_fd = -1, prog_fd = -1, key = 0, err;
union bpf_attr attr;
- err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1);
+ err = map_fd = skel_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1,
+ opts->excl_prog_hash, opts->excl_prog_hash_sz);
if (map_fd < 0) {
opts->errstr = "failed to create loader map";
set_err;
@@ -327,11 +372,34 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
goto out;
}
+#ifndef __KERNEL__
+ err = skel_map_freeze(map_fd);
+ if (err < 0) {
+ opts->errstr = "failed to freeze map";
+ set_err;
+ goto out;
+ }
+ err = skel_obj_get_info_by_fd(map_fd);
+ if (err < 0) {
+ opts->errstr = "failed to fetch obj info";
+ set_err;
+ goto out;
+ }
+#endif
+
memset(&attr, 0, prog_load_attr_sz);
attr.prog_type = BPF_PROG_TYPE_SYSCALL;
attr.insns = (long) opts->insns;
attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
attr.license = (long) "Dual BSD/GPL";
+#ifndef __KERNEL__
+ attr.signature = (long) opts->signature;
+ attr.signature_size = opts->signature_sz;
+#else
+ if (opts->signature || opts->signature_sz)
+ pr_warn("signatures are not supported from bpf_preload\n");
+#endif
+ attr.keyring_id = opts->keyring_id;
memcpy(attr.prog_name, "__loader.prog", sizeof("__loader.prog"));
attr.fd_array = (long) &map_fd;
attr.log_level = opts->ctx->log_level;
diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c
deleted file mode 100644
index 9a541762f54c..000000000000
--- a/tools/lib/bpf/str_error.c
+++ /dev/null
@@ -1,104 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-#undef _GNU_SOURCE
-#include <string.h>
-#include <stdio.h>
-#include <errno.h>
-#include "str_error.h"
-
-#ifndef ENOTSUPP
-#define ENOTSUPP 524
-#endif
-
-/* make sure libbpf doesn't use kernel-only integer typedefs */
-#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-
-/*
- * Wrapper to allow for building in non-GNU systems such as Alpine Linux's musl
- * libc, while checking strerror_r() return to avoid having to check this in
- * all places calling it.
- */
-char *libbpf_strerror_r(int err, char *dst, int len)
-{
- int ret = strerror_r(err < 0 ? -err : err, dst, len);
- /* on glibc <2.13, ret == -1 and errno is set, if strerror_r() can't
- * handle the error, on glibc >=2.13 *positive* (errno-like) error
- * code is returned directly
- */
- if (ret == -1)
- ret = errno;
- if (ret) {
- if (ret == EINVAL)
- /* strerror_r() doesn't recognize this specific error */
- snprintf(dst, len, "unknown error (%d)", err < 0 ? err : -err);
- else
- snprintf(dst, len, "ERROR: strerror_r(%d)=%d", err, ret);
- }
- return dst;
-}
-
-const char *libbpf_errstr(int err)
-{
- static __thread char buf[12];
-
- if (err > 0)
- err = -err;
-
- switch (err) {
- case -E2BIG: return "-E2BIG";
- case -EACCES: return "-EACCES";
- case -EADDRINUSE: return "-EADDRINUSE";
- case -EADDRNOTAVAIL: return "-EADDRNOTAVAIL";
- case -EAGAIN: return "-EAGAIN";
- case -EALREADY: return "-EALREADY";
- case -EBADF: return "-EBADF";
- case -EBADFD: return "-EBADFD";
- case -EBUSY: return "-EBUSY";
- case -ECANCELED: return "-ECANCELED";
- case -ECHILD: return "-ECHILD";
- case -EDEADLK: return "-EDEADLK";
- case -EDOM: return "-EDOM";
- case -EEXIST: return "-EEXIST";
- case -EFAULT: return "-EFAULT";
- case -EFBIG: return "-EFBIG";
- case -EILSEQ: return "-EILSEQ";
- case -EINPROGRESS: return "-EINPROGRESS";
- case -EINTR: return "-EINTR";
- case -EINVAL: return "-EINVAL";
- case -EIO: return "-EIO";
- case -EISDIR: return "-EISDIR";
- case -ELOOP: return "-ELOOP";
- case -EMFILE: return "-EMFILE";
- case -EMLINK: return "-EMLINK";
- case -EMSGSIZE: return "-EMSGSIZE";
- case -ENAMETOOLONG: return "-ENAMETOOLONG";
- case -ENFILE: return "-ENFILE";
- case -ENODATA: return "-ENODATA";
- case -ENODEV: return "-ENODEV";
- case -ENOENT: return "-ENOENT";
- case -ENOEXEC: return "-ENOEXEC";
- case -ENOLINK: return "-ENOLINK";
- case -ENOMEM: return "-ENOMEM";
- case -ENOSPC: return "-ENOSPC";
- case -ENOTBLK: return "-ENOTBLK";
- case -ENOTDIR: return "-ENOTDIR";
- case -ENOTSUPP: return "-ENOTSUPP";
- case -ENOTTY: return "-ENOTTY";
- case -ENXIO: return "-ENXIO";
- case -EOPNOTSUPP: return "-EOPNOTSUPP";
- case -EOVERFLOW: return "-EOVERFLOW";
- case -EPERM: return "-EPERM";
- case -EPIPE: return "-EPIPE";
- case -EPROTO: return "-EPROTO";
- case -EPROTONOSUPPORT: return "-EPROTONOSUPPORT";
- case -ERANGE: return "-ERANGE";
- case -EROFS: return "-EROFS";
- case -ESPIPE: return "-ESPIPE";
- case -ESRCH: return "-ESRCH";
- case -ETXTBSY: return "-ETXTBSY";
- case -EUCLEAN: return "-EUCLEAN";
- case -EXDEV: return "-EXDEV";
- default:
- snprintf(buf, sizeof(buf), "%d", err);
- return buf;
- }
-}
diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h
deleted file mode 100644
index 53e7fbffc13e..000000000000
--- a/tools/lib/bpf/str_error.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __LIBBPF_STR_ERROR_H
-#define __LIBBPF_STR_ERROR_H
-
-#define STRERR_BUFSIZE 128
-
-char *libbpf_strerror_r(int err, char *dst, int len);
-
-/**
- * @brief **libbpf_errstr()** returns string corresponding to numeric errno
- * @param err negative numeric errno
- * @return pointer to string representation of the errno, that is invalidated
- * upon the next call.
- */
-const char *libbpf_errstr(int err);
-
-#define errstr(err) libbpf_errstr(err)
-
-#endif /* __LIBBPF_STR_ERROR_H */
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
index 2a7865c8e3fe..43deb05a5197 100644
--- a/tools/lib/bpf/usdt.bpf.h
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -34,13 +34,32 @@ enum __bpf_usdt_arg_type {
BPF_USDT_ARG_CONST,
BPF_USDT_ARG_REG,
BPF_USDT_ARG_REG_DEREF,
+ BPF_USDT_ARG_SIB,
};
+/*
+ * This struct layout is designed specifically to be backwards/forward
+ * compatible between libbpf versions for ARG_CONST, ARG_REG, and
+ * ARG_REG_DEREF modes. ARG_SIB requires libbpf v1.7+.
+ */
struct __bpf_usdt_arg_spec {
/* u64 scalar interpreted depending on arg_type, see below */
__u64 val_off;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
/* arg location case, see bpf_usdt_arg() for details */
- enum __bpf_usdt_arg_type arg_type;
+ enum __bpf_usdt_arg_type arg_type: 8;
+ /* index register offset within struct pt_regs */
+ __u16 idx_reg_off: 12;
+ /* scale factor for index register (1, 2, 4, or 8) */
+ __u16 scale_bitshift: 4;
+ /* reserved for future use, keeps reg_off offset stable */
+ __u8 __reserved: 8;
+#else
+ __u8 __reserved: 8;
+ __u16 idx_reg_off: 12;
+ __u16 scale_bitshift: 4;
+ enum __bpf_usdt_arg_type arg_type: 8;
+#endif
/* offset of referenced register within struct pt_regs */
short reg_off;
/* whether arg should be interpreted as signed value */
@@ -149,7 +168,7 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
{
struct __bpf_usdt_spec *spec;
struct __bpf_usdt_arg_spec *arg_spec;
- unsigned long val;
+ unsigned long val, idx;
int err, spec_id;
*res = 0;
@@ -204,6 +223,27 @@ int bpf_usdt_arg(struct pt_regs *ctx, __u64 arg_num, long *res)
val >>= arg_spec->arg_bitshift;
#endif
break;
+ case BPF_USDT_ARG_SIB:
+ /* Arg is in memory addressed by SIB (Scale-Index-Base) mode
+ * (e.g., "-1@-96(%rbp,%rax,8)" in USDT arg spec). We first
+ * fetch the base register contents and the index register
+ * contents from pt_regs. Then we calculate the final address
+ * as base + (index * scale) + offset, and do a user-space
+ * probe read to fetch the argument value.
+ */
+ err = bpf_probe_read_kernel(&val, sizeof(val), (void *)ctx + arg_spec->reg_off);
+ if (err)
+ return err;
+ err = bpf_probe_read_kernel(&idx, sizeof(idx), (void *)ctx + arg_spec->idx_reg_off);
+ if (err)
+ return err;
+ err = bpf_probe_read_user(&val, sizeof(val), (void *)(val + (idx << arg_spec->scale_bitshift) + arg_spec->val_off));
+ if (err)
+ return err;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ val >>= arg_spec->arg_bitshift;
+#endif
+ break;
default:
return -EINVAL;
}
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index 3373b9d45ac4..d1524f6f54ae 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -20,7 +20,6 @@
#include "libbpf_common.h"
#include "libbpf_internal.h"
#include "hashmap.h"
-#include "str_error.h"
/* libbpf's USDT support consists of BPF-side state/code and user-space
* state/code working together in concert. BPF-side parts are defined in
@@ -200,12 +199,23 @@ enum usdt_arg_type {
USDT_ARG_CONST,
USDT_ARG_REG,
USDT_ARG_REG_DEREF,
+ USDT_ARG_SIB,
};
/* should match exactly struct __bpf_usdt_arg_spec from usdt.bpf.h */
struct usdt_arg_spec {
__u64 val_off;
- enum usdt_arg_type arg_type;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ enum usdt_arg_type arg_type: 8;
+ __u16 idx_reg_off: 12;
+ __u16 scale_bitshift: 4;
+ __u8 __reserved: 8; /* keep reg_off offset stable */
+#else
+ __u8 __reserved: 8; /* keep reg_off offset stable */
+ __u16 idx_reg_off: 12;
+ __u16 scale_bitshift: 4;
+ enum usdt_arg_type arg_type: 8;
+#endif
short reg_off;
bool arg_signed;
char arg_bitshift;
@@ -570,9 +580,8 @@ static struct elf_seg *find_vma_seg(struct elf_seg *segs, size_t seg_cnt, long o
return NULL;
}
-static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
- const char *data, size_t name_off, size_t desc_off,
- struct usdt_note *usdt_note);
+static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off,
+ size_t desc_off, struct usdt_note *usdt_note);
static int parse_usdt_spec(struct usdt_spec *spec, const struct usdt_note *note, __u64 usdt_cookie);
@@ -626,7 +635,7 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char *
struct elf_seg *seg = NULL;
void *tmp;
- err = parse_usdt_note(elf, path, &nhdr, data->d_buf, name_off, desc_off, &note);
+ err = parse_usdt_note(&nhdr, data->d_buf, name_off, desc_off, &note);
if (err)
goto err_out;
@@ -1132,8 +1141,7 @@ err_out:
/* Parse out USDT ELF note from '.note.stapsdt' section.
* Logic inspired by perf's code.
*/
-static int parse_usdt_note(Elf *elf, const char *path, GElf_Nhdr *nhdr,
- const char *data, size_t name_off, size_t desc_off,
+static int parse_usdt_note(GElf_Nhdr *nhdr, const char *data, size_t name_off, size_t desc_off,
struct usdt_note *note)
{
const char *provider, *name, *args;
@@ -1283,11 +1291,51 @@ static int calc_pt_regs_off(const char *reg_name)
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
- char reg_name[16];
- int len, reg_off;
- long off;
+ char reg_name[16] = {0}, idx_reg_name[16] = {0};
+ int len, reg_off, idx_reg_off, scale = 1;
+ long off = 0;
+
+ if (sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^,] , %d ) %n",
+ arg_sz, &off, reg_name, idx_reg_name, &scale, &len) == 5 ||
+ sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^,] , %d ) %n",
+ arg_sz, reg_name, idx_reg_name, &scale, &len) == 4 ||
+ sscanf(arg_str, " %d @ %ld ( %%%15[^,] , %%%15[^)] ) %n",
+ arg_sz, &off, reg_name, idx_reg_name, &len) == 4 ||
+ sscanf(arg_str, " %d @ ( %%%15[^,] , %%%15[^)] ) %n",
+ arg_sz, reg_name, idx_reg_name, &len) == 3
+ ) {
+ /*
+ * Scale Index Base case:
+ * 1@-96(%rbp,%rax,8)
+ * 1@(%rbp,%rax,8)
+ * 1@-96(%rbp,%rax)
+ * 1@(%rbp,%rax)
+ */
+ arg->arg_type = USDT_ARG_SIB;
+ arg->val_off = off;
- if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n", arg_sz, &off, reg_name, &len) == 3) {
+ reg_off = calc_pt_regs_off(reg_name);
+ if (reg_off < 0)
+ return reg_off;
+ arg->reg_off = reg_off;
+
+ idx_reg_off = calc_pt_regs_off(idx_reg_name);
+ if (idx_reg_off < 0)
+ return idx_reg_off;
+ arg->idx_reg_off = idx_reg_off;
+
+ /* validate scale factor and set fields directly */
+ switch (scale) {
+ case 1: arg->scale_bitshift = 0; break;
+ case 2: arg->scale_bitshift = 1; break;
+ case 4: arg->scale_bitshift = 2; break;
+ case 8: arg->scale_bitshift = 3; break;
+ default:
+ pr_warn("usdt: invalid SIB scale %d, expected 1, 2, 4, 8\n", scale);
+ return -EINVAL;
+ }
+ } else if (sscanf(arg_str, " %d @ %ld ( %%%15[^)] ) %n",
+ arg_sz, &off, reg_name, &len) == 3) {
/* Memory dereference case, e.g., -4@-20(%rbp) */
arg->arg_type = USDT_ARG_REG_DEREF;
arg->val_off = off;
@@ -1306,6 +1354,7 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
} else if (sscanf(arg_str, " %d @ %%%15s %n", arg_sz, reg_name, &len) == 2) {
/* Register read case, e.g., -4@%eax */
arg->arg_type = USDT_ARG_REG;
+ /* register read has no memory offset */
arg->val_off = 0;
reg_off = calc_pt_regs_off(reg_name);
@@ -1327,8 +1376,6 @@ static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec
#elif defined(__s390x__)
-/* Do not support __s390__ for now, since user_pt_regs is broken with -m31. */
-
static int parse_usdt_arg(const char *arg_str, int arg_num, struct usdt_arg_spec *arg, int *arg_sz)
{
unsigned int reg;
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index b20a5280f2b3..4160e7d2e120 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -368,10 +368,12 @@ struct perf_cpu perf_cpu_map__max(const struct perf_cpu_map *map)
.cpu = -1
};
- // cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well.
- return __perf_cpu_map__nr(map) > 0
- ? __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1)
- : result;
+ if (!map)
+ return result;
+
+ // The CPUs are always sorted and nr is always > 0 as 0 length map is
+ // encoded as NULL.
+ return __perf_cpu_map__cpu(map, __perf_cpu_map__nr(map) - 1);
}
/** Is 'b' a subset of 'a'. */
@@ -453,21 +455,33 @@ int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other)
struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
struct perf_cpu_map *other)
{
- struct perf_cpu *tmp_cpus;
- int tmp_len;
int i, j, k;
- struct perf_cpu_map *merged = NULL;
+ struct perf_cpu_map *merged;
if (perf_cpu_map__is_subset(other, orig))
return perf_cpu_map__get(orig);
if (perf_cpu_map__is_subset(orig, other))
return perf_cpu_map__get(other);
- tmp_len = max(__perf_cpu_map__nr(orig), __perf_cpu_map__nr(other));
- tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
- if (!tmp_cpus)
+ i = j = k = 0;
+ while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
+ if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
+ i++;
+ else if (__perf_cpu_map__cpu(orig, i).cpu > __perf_cpu_map__cpu(other, j).cpu)
+ j++;
+ else { /* CPUs match. */
+ i++;
+ j++;
+ k++;
+ }
+ }
+ if (k == 0) /* Maps are completely disjoint. */
return NULL;
+ merged = perf_cpu_map__alloc(k);
+ if (!merged)
+ return NULL;
+ /* Entries are added to merged in sorted order, so no need to sort again. */
i = j = k = 0;
while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
if (__perf_cpu_map__cpu(orig, i).cpu < __perf_cpu_map__cpu(other, j).cpu)
@@ -476,11 +490,8 @@ struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
j++;
else {
j++;
- tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
+ RC_CHK_ACCESS(merged)->map[k++] = __perf_cpu_map__cpu(orig, i++);
}
}
- if (k)
- merged = cpu_map__trim_new(k, tmp_cpus);
- free(tmp_cpus);
return merged;
}
diff --git a/tools/lib/perf/include/perf/core.h b/tools/lib/perf/include/perf/core.h
index a3f6d68edad7..06cc132d88cf 100644
--- a/tools/lib/perf/include/perf/core.h
+++ b/tools/lib/perf/include/perf/core.h
@@ -5,7 +5,7 @@
#include <stdarg.h>
#ifndef LIBPERF_API
-#define LIBPERF_API __attribute__((visibility("default")))
+#define LIBPERF_API extern __attribute__((visibility("default")))
#endif
enum libperf_print_level {
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 6608f1e3701b..43a8cb04994f 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -151,6 +151,18 @@ struct perf_record_switch {
__u32 next_prev_tid;
};
+struct perf_record_callchain_deferred {
+ struct perf_event_header header;
+ /*
+ * This is to match kernel and (deferred) user stacks together.
+ * The kernel part will be in the sample callchain array after
+ * the PERF_CONTEXT_USER_DEFERRED entry.
+ */
+ __u64 cookie;
+ __u64 nr;
+ __u64 ips[];
+};
+
struct perf_record_header_attr {
struct perf_event_header header;
struct perf_event_attr attr;
@@ -291,6 +303,7 @@ struct perf_record_header_event_type {
struct perf_record_header_tracing_data {
struct perf_event_header header;
__u32 size;
+ __u32 pad;
};
#define PERF_RECORD_MISC_BUILD_ID_SIZE (1 << 15)
@@ -522,6 +535,7 @@ union perf_event {
struct perf_record_read read;
struct perf_record_throttle throttle;
struct perf_record_sample sample;
+ struct perf_record_callchain_deferred callchain_deferred;
struct perf_record_bpf_event bpf;
struct perf_record_ksymbol ksymbol;
struct perf_record_text_poke_event text_poke;
diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c
index c1a51d925e0e..ec124eb0ec0a 100644
--- a/tools/lib/perf/mmap.c
+++ b/tools/lib/perf/mmap.c
@@ -508,7 +508,7 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count
idx = READ_ONCE(pc->index);
cnt = READ_ONCE(pc->offset);
if (pc->cap_user_rdpmc && idx) {
- s64 evcnt = read_perf_counter(idx - 1);
+ u64 evcnt = read_perf_counter(idx - 1);
u16 width = READ_ONCE(pc->pmc_width);
evcnt <<= 64 - width;
diff --git a/tools/lib/python/__init__.py b/tools/lib/python/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/lib/python/__init__.py
diff --git a/tools/lib/python/abi/__init__.py b/tools/lib/python/abi/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/lib/python/abi/__init__.py
diff --git a/tools/lib/python/abi/abi_parser.py b/tools/lib/python/abi/abi_parser.py
new file mode 100644
index 000000000000..9b8db70067ef
--- /dev/null
+++ b/tools/lib/python/abi/abi_parser.py
@@ -0,0 +1,628 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0903,R0911,R0912,R0913,R0914,R0915,R0917,C0302
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Parse ABI documentation and produce results from it.
+"""
+
+from argparse import Namespace
+import logging
+import os
+import re
+
+from pprint import pformat
+from random import randrange, seed
+
+# Import Python modules
+
+from abi.helpers import AbiDebug, ABI_DIR
+
+
+class AbiParser:
+ """Main class to parse ABI files"""
+
+ TAGS = r"(what|where|date|kernelversion|contact|description|users)"
+ XREF = r"(?:^|\s|\()(\/(?:sys|config|proc|dev|kvd)\/[^,.:;\)\s]+)(?:[,.:;\)\s]|\Z)"
+
+ def __init__(self, directory, logger=None,
+ enable_lineno=False, show_warnings=True, debug=0):
+ """Stores arguments for the class and initialize class vars"""
+
+ self.directory = directory
+ self.enable_lineno = enable_lineno
+ self.show_warnings = show_warnings
+ self.debug = debug
+
+ if not logger:
+ self.log = logging.getLogger("get_abi")
+ else:
+ self.log = logger
+
+ self.data = {}
+ self.what_symbols = {}
+ self.file_refs = {}
+ self.what_refs = {}
+
+ # Ignore files that contain such suffixes
+ self.ignore_suffixes = (".rej", ".org", ".orig", ".bak", "~")
+
+ # Regular expressions used on parser
+ self.re_abi_dir = re.compile(r"(.*)" + ABI_DIR)
+ self.re_tag = re.compile(r"(\S+)(:\s*)(.*)", re.I)
+ self.re_valid = re.compile(self.TAGS)
+ self.re_start_spc = re.compile(r"(\s*)(\S.*)")
+ self.re_whitespace = re.compile(r"^\s+")
+
+ # Regular used on print
+ self.re_what = re.compile(r"(\/?(?:[\w\-]+\/?){1,2})")
+ self.re_escape = re.compile(r"([\.\x01-\x08\x0e-\x1f\x21-\x2f\x3a-\x40\x7b-\xff])")
+ self.re_unprintable = re.compile(r"([\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\xff]+)")
+ self.re_title_mark = re.compile(r"\n[\-\*\=\^\~]+\n")
+ self.re_doc = re.compile(r"Documentation/(?!devicetree)(\S+)\.rst")
+ self.re_abi = re.compile(r"(Documentation/ABI/)([\w\/\-]+)")
+ self.re_xref_node = re.compile(self.XREF)
+
+ def warn(self, fdata, msg, extra=None):
+ """Displays a parse error if warning is enabled"""
+
+ if not self.show_warnings:
+ return
+
+ msg = f"{fdata.fname}:{fdata.ln}: {msg}"
+ if extra:
+ msg += "\n\t\t" + extra
+
+ self.log.warning(msg)
+
+ def add_symbol(self, what, fname, ln=None, xref=None):
+ """Create a reference table describing where each 'what' is located"""
+
+ if what not in self.what_symbols:
+ self.what_symbols[what] = {"file": {}}
+
+ if fname not in self.what_symbols[what]["file"]:
+ self.what_symbols[what]["file"][fname] = []
+
+ if ln and ln not in self.what_symbols[what]["file"][fname]:
+ self.what_symbols[what]["file"][fname].append(ln)
+
+ if xref:
+ self.what_symbols[what]["xref"] = xref
+
+ def _parse_line(self, fdata, line):
+ """Parse a single line of an ABI file"""
+
+ new_what = False
+ new_tag = False
+ content = None
+
+ match = self.re_tag.match(line)
+ if match:
+ new = match.group(1).lower()
+ sep = match.group(2)
+ content = match.group(3)
+
+ match = self.re_valid.search(new)
+ if match:
+ new_tag = match.group(1)
+ else:
+ if fdata.tag == "description":
+ # New "tag" is actually part of description.
+ # Don't consider it a tag
+ new_tag = False
+ elif fdata.tag != "":
+ self.warn(fdata, f"tag '{fdata.tag}' is invalid", line)
+
+ if new_tag:
+ # "where" is Invalid, but was a common mistake. Warn if found
+ if new_tag == "where":
+ self.warn(fdata, "tag 'Where' is invalid. Should be 'What:' instead")
+ new_tag = "what"
+
+ if new_tag == "what":
+ fdata.space = None
+
+ if content not in self.what_symbols:
+ self.add_symbol(what=content, fname=fdata.fname, ln=fdata.ln)
+
+ if fdata.tag == "what":
+ fdata.what.append(content.strip("\n"))
+ else:
+ if fdata.key:
+ if "description" not in self.data.get(fdata.key, {}):
+ self.warn(fdata, f"{fdata.key} doesn't have a description")
+
+ for w in fdata.what:
+ self.add_symbol(what=w, fname=fdata.fname,
+ ln=fdata.what_ln, xref=fdata.key)
+
+ fdata.label = content
+ new_what = True
+
+ key = "abi_" + content.lower()
+ fdata.key = self.re_unprintable.sub("_", key).strip("_")
+
+ # Avoid duplicated keys but using a defined seed, to make
+ # the namespace identical if there aren't changes at the
+ # ABI symbols
+ seed(42)
+
+ while fdata.key in self.data:
+ char = randrange(0, 51) + ord("A")
+ if char > ord("Z"):
+ char += ord("a") - ord("Z") - 1
+
+ fdata.key += chr(char)
+
+ if fdata.key and fdata.key not in self.data:
+ self.data[fdata.key] = {
+ "what": [content],
+ "file": [fdata.file_ref],
+ "path": fdata.ftype,
+ "line_no": fdata.ln,
+ }
+
+ fdata.what = self.data[fdata.key]["what"]
+
+ self.what_refs[content] = fdata.key
+ fdata.tag = new_tag
+ fdata.what_ln = fdata.ln
+
+ if fdata.nametag["what"]:
+ t = (content, fdata.key)
+ if t not in fdata.nametag["symbols"]:
+ fdata.nametag["symbols"].append(t)
+
+ return
+
+ if fdata.tag and new_tag:
+ fdata.tag = new_tag
+
+ if new_what:
+ fdata.label = ""
+
+ if "description" in self.data[fdata.key]:
+ self.data[fdata.key]["description"] += "\n\n"
+
+ if fdata.file_ref not in self.data[fdata.key]["file"]:
+ self.data[fdata.key]["file"].append(fdata.file_ref)
+
+ if self.debug == AbiDebug.WHAT_PARSING:
+ self.log.debug("what: %s", fdata.what)
+
+ if not fdata.what:
+ self.warn(fdata, "'What:' should come first:", line)
+ return
+
+ if new_tag == "description":
+ fdata.space = None
+
+ if content:
+ sep = sep.replace(":", " ")
+
+ c = " " * len(new_tag) + sep + content
+ c = c.expandtabs()
+
+ match = self.re_start_spc.match(c)
+ if match:
+ # Preserve initial spaces for the first line
+ fdata.space = match.group(1)
+ content = match.group(2) + "\n"
+
+ self.data[fdata.key][fdata.tag] = content
+
+ return
+
+ # Store any contents before tags at the database
+ if not fdata.tag and "what" in fdata.nametag:
+ fdata.nametag["description"] += line
+ return
+
+ if fdata.tag == "description":
+ content = line.expandtabs()
+
+ if self.re_whitespace.sub("", content) == "":
+ self.data[fdata.key][fdata.tag] += "\n"
+ return
+
+ if fdata.space is None:
+ match = self.re_start_spc.match(content)
+ if match:
+ # Preserve initial spaces for the first line
+ fdata.space = match.group(1)
+
+ content = match.group(2) + "\n"
+ else:
+ if content.startswith(fdata.space):
+ content = content[len(fdata.space):]
+
+ else:
+ fdata.space = ""
+
+ if fdata.tag == "what":
+ w = content.strip("\n")
+ if w:
+ self.data[fdata.key][fdata.tag].append(w)
+ else:
+ self.data[fdata.key][fdata.tag] += content
+ return
+
+ content = line.strip()
+ if fdata.tag:
+ if fdata.tag == "what":
+ w = content.strip("\n")
+ if w:
+ self.data[fdata.key][fdata.tag].append(w)
+ else:
+ self.data[fdata.key][fdata.tag] += "\n" + content.rstrip("\n")
+ return
+
+ # Everything else is error
+ if content:
+ self.warn(fdata, "Unexpected content", line)
+
+ def parse_readme(self, nametag, fname):
+ """Parse ABI README file"""
+
+ nametag["what"] = ["Introduction"]
+ nametag["path"] = "README"
+ with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
+ for line in fp:
+ match = self.re_tag.match(line)
+ if match:
+ new = match.group(1).lower()
+
+ match = self.re_valid.search(new)
+ if match:
+ nametag["description"] += "\n:" + line
+ continue
+
+ nametag["description"] += line
+
+ def parse_file(self, fname, path, basename):
+ """Parse a single file"""
+
+ ref = f"abi_file_{path}_{basename}"
+ ref = self.re_unprintable.sub("_", ref).strip("_")
+
+ # Store per-file state into a namespace variable. This will be used
+ # by the per-line parser state machine and by the warning function.
+ fdata = Namespace
+
+ fdata.fname = fname
+ fdata.name = basename
+
+ pos = fname.find(ABI_DIR)
+ if pos > 0:
+ f = fname[pos:]
+ else:
+ f = fname
+
+ fdata.file_ref = (f, ref)
+ self.file_refs[f] = ref
+
+ fdata.ln = 0
+ fdata.what_ln = 0
+ fdata.tag = ""
+ fdata.label = ""
+ fdata.what = []
+ fdata.key = None
+ fdata.xrefs = None
+ fdata.space = None
+ fdata.ftype = path.split("/")[0]
+
+ fdata.nametag = {}
+ fdata.nametag["what"] = [f"ABI file {path}/{basename}"]
+ fdata.nametag["type"] = "File"
+ fdata.nametag["path"] = fdata.ftype
+ fdata.nametag["file"] = [fdata.file_ref]
+ fdata.nametag["line_no"] = 1
+ fdata.nametag["description"] = ""
+ fdata.nametag["symbols"] = []
+
+ self.data[ref] = fdata.nametag
+
+ if self.debug & AbiDebug.WHAT_OPEN:
+ self.log.debug("Opening file %s", fname)
+
+ if basename == "README":
+ self.parse_readme(fdata.nametag, fname)
+ return
+
+ with open(fname, "r", encoding="utf8", errors="backslashreplace") as fp:
+ for line in fp:
+ fdata.ln += 1
+
+ self._parse_line(fdata, line)
+
+ if "description" in fdata.nametag:
+ fdata.nametag["description"] = fdata.nametag["description"].lstrip("\n")
+
+ if fdata.key:
+ if "description" not in self.data.get(fdata.key, {}):
+ self.warn(fdata, f"{fdata.key} doesn't have a description")
+
+ for w in fdata.what:
+ self.add_symbol(what=w, fname=fname, xref=fdata.key)
+
+ def _parse_abi(self, root=None):
+ """Internal function to parse documentation ABI recursively"""
+
+ if not root:
+ root = self.directory
+
+ with os.scandir(root) as obj:
+ for entry in obj:
+ name = os.path.join(root, entry.name)
+
+ if entry.is_dir():
+ self._parse_abi(name)
+ continue
+
+ if not entry.is_file():
+ continue
+
+ basename = os.path.basename(name)
+
+ if basename.startswith("."):
+ continue
+
+ if basename.endswith(self.ignore_suffixes):
+ continue
+
+ path = self.re_abi_dir.sub("", os.path.dirname(name))
+
+ self.parse_file(name, path, basename)
+
+ def parse_abi(self, root=None):
+ """Parse documentation ABI"""
+
+ self._parse_abi(root)
+
+ if self.debug & AbiDebug.DUMP_ABI_STRUCTS:
+ self.log.debug(pformat(self.data))
+
+ def desc_txt(self, desc):
+ """Print description as found inside ABI files"""
+
+ desc = desc.strip(" \t\n")
+
+ return desc + "\n\n"
+
+ def xref(self, fname):
+ """
+ Converts a Documentation/ABI + basename into a ReST cross-reference
+ """
+
+ xref = self.file_refs.get(fname)
+ if not xref:
+ return None
+ else:
+ return xref
+
+ def desc_rst(self, desc):
+ """Enrich ReST output by creating cross-references"""
+
+ # Remove title markups from the description
+ # Having titles inside ABI files will only work if extra
+ # care would be taken in order to strictly follow the same
+ # level order for each markup.
+ desc = self.re_title_mark.sub("\n\n", "\n" + desc)
+ desc = desc.rstrip(" \t\n").lstrip("\n")
+
+ # Python's regex performance for non-compiled expressions is a lot
+ # than Perl, as Perl automatically caches them at their
+ # first usage. Here, we'll need to do the same, as otherwise the
+ # performance penalty is be high
+
+ new_desc = ""
+ for d in desc.split("\n"):
+ if d == "":
+ new_desc += "\n"
+ continue
+
+ # Use cross-references for doc files where needed
+ d = self.re_doc.sub(r":doc:`/\1`", d)
+
+ # Use cross-references for ABI generated docs where needed
+ matches = self.re_abi.findall(d)
+ for m in matches:
+ abi = m[0] + m[1]
+
+ xref = self.file_refs.get(abi)
+ if not xref:
+ # This may happen if ABI is on a separate directory,
+ # like parsing ABI testing and symbol is at stable.
+ # The proper solution is to move this part of the code
+ # for it to be inside sphinx/kernel_abi.py
+ self.log.info("Didn't find ABI reference for '%s'", abi)
+ else:
+ new = self.re_escape.sub(r"\\\1", m[1])
+ d = re.sub(fr"\b{abi}\b", f":ref:`{new} <{xref}>`", d)
+
+ # Seek for cross reference symbols like /sys/...
+ # Need to be careful to avoid doing it on a code block
+ if d[0] not in [" ", "\t"]:
+ matches = self.re_xref_node.findall(d)
+ for m in matches:
+ # Finding ABI here is more complex due to wildcards
+ xref = self.what_refs.get(m)
+ if xref:
+ new = self.re_escape.sub(r"\\\1", m)
+ d = re.sub(fr"\b{m}\b", f":ref:`{new} <{xref}>`", d)
+
+ new_desc += d + "\n"
+
+ return new_desc + "\n\n"
+
+ def doc(self, output_in_txt=False, show_symbols=True, show_file=True,
+ filter_path=None):
+ """Print ABI at stdout"""
+
+ part = None
+ for key, v in sorted(self.data.items(),
+ key=lambda x: (x[1].get("type", ""),
+ x[1].get("what"))):
+
+ wtype = v.get("type", "Symbol")
+ file_ref = v.get("file")
+ names = v.get("what", [""])
+
+ if wtype == "File":
+ if not show_file:
+ continue
+ else:
+ if not show_symbols:
+ continue
+
+ if filter_path:
+ if v.get("path") != filter_path:
+ continue
+
+ msg = ""
+
+ if wtype != "File":
+ cur_part = names[0]
+ if cur_part.find("/") >= 0:
+ match = self.re_what.match(cur_part)
+ if match:
+ symbol = match.group(1).rstrip("/")
+ cur_part = "Symbols under " + symbol
+
+ if cur_part and cur_part != part:
+ part = cur_part
+ msg += part + "\n"+ "-" * len(part) +"\n\n"
+
+ msg += f".. _{key}:\n\n"
+
+ max_len = 0
+ for i in range(0, len(names)): # pylint: disable=C0200
+ names[i] = "**" + self.re_escape.sub(r"\\\1", names[i]) + "**"
+
+ max_len = max(max_len, len(names[i]))
+
+ msg += "+-" + "-" * max_len + "-+\n"
+ for name in names:
+ msg += f"| {name}" + " " * (max_len - len(name)) + " |\n"
+ msg += "+-" + "-" * max_len + "-+\n"
+ msg += "\n"
+
+ for ref in file_ref:
+ if wtype == "File":
+ msg += f".. _{ref[1]}:\n\n"
+ else:
+ base = os.path.basename(ref[0])
+ msg += f"Defined on file :ref:`{base} <{ref[1]}>`\n\n"
+
+ if wtype == "File":
+ msg += names[0] +"\n" + "-" * len(names[0]) +"\n\n"
+
+ desc = v.get("description")
+ if not desc and wtype != "File":
+ msg += f"DESCRIPTION MISSING for {names[0]}\n\n"
+
+ if desc:
+ if output_in_txt:
+ msg += self.desc_txt(desc)
+ else:
+ msg += self.desc_rst(desc)
+
+ symbols = v.get("symbols")
+ if symbols:
+ msg += "Has the following ABI:\n\n"
+
+ for w, label in symbols:
+ # Escape special chars from content
+ content = self.re_escape.sub(r"\\\1", w)
+
+ msg += f"- :ref:`{content} <{label}>`\n\n"
+
+ users = v.get("users")
+ if users and users.strip(" \t\n"):
+ users = users.strip("\n").replace('\n', '\n\t')
+ msg += f"Users:\n\t{users}\n\n"
+
+ ln = v.get("line_no", 1)
+
+ yield (msg, file_ref[0][0], ln)
+
+ def check_issues(self):
+ """Warn about duplicated ABI entries"""
+
+ for what, v in self.what_symbols.items():
+ files = v.get("file")
+ if not files:
+ # Should never happen if the parser works properly
+ self.log.warning("%s doesn't have a file associated", what)
+ continue
+
+ if len(files) == 1:
+ continue
+
+ f = []
+ for fname, lines in sorted(files.items()):
+ if not lines:
+ f.append(f"{fname}")
+ elif len(lines) == 1:
+ f.append(f"{fname}:{lines[0]}")
+ else:
+ m = fname + "lines "
+ m += ", ".join(str(x) for x in lines)
+ f.append(m)
+
+ self.log.warning("%s is defined %d times: %s", what, len(f), "; ".join(f))
+
+ def search_symbols(self, expr):
+ """ Searches for ABI symbols """
+
+ regex = re.compile(expr, re.I)
+
+ found_keys = 0
+ for t in sorted(self.data.items(), key=lambda x: [0]):
+ v = t[1]
+
+ wtype = v.get("type", "")
+ if wtype == "File":
+ continue
+
+ for what in v.get("what", [""]):
+ if regex.search(what):
+ found_keys += 1
+
+ kernelversion = v.get("kernelversion", "").strip(" \t\n")
+ date = v.get("date", "").strip(" \t\n")
+ contact = v.get("contact", "").strip(" \t\n")
+ users = v.get("users", "").strip(" \t\n")
+ desc = v.get("description", "").strip(" \t\n")
+
+ files = []
+ for f in v.get("file", ()):
+ files.append(f[0])
+
+ what = str(found_keys) + ". " + what
+ title_tag = "-" * len(what)
+
+ print(f"\n{what}\n{title_tag}\n")
+
+ if kernelversion:
+ print(f"Kernel version:\t\t{kernelversion}")
+
+ if date:
+ print(f"Date:\t\t\t{date}")
+
+ if contact:
+ print(f"Contact:\t\t{contact}")
+
+ if users:
+ print(f"Users:\t\t\t{users}")
+
+ print("Defined on file(s):\t" + ", ".join(files))
+
+ if desc:
+ desc = desc.strip("\n")
+ print(f"\n{desc}\n")
+
+ if not found_keys:
+ print(f"Regular expression /{expr}/ not found.")
diff --git a/tools/lib/python/abi/abi_regex.py b/tools/lib/python/abi/abi_regex.py
new file mode 100644
index 000000000000..d5553206de3c
--- /dev/null
+++ b/tools/lib/python/abi/abi_regex.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+# xxpylint: disable=R0903
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Convert ABI what into regular expressions
+"""
+
+import re
+import sys
+
+from pprint import pformat
+
+from abi.abi_parser import AbiParser
+from abi.helpers import AbiDebug
+
+class AbiRegex(AbiParser):
+ """Extends AbiParser to search ABI nodes with regular expressions"""
+
+ # Escape only ASCII visible characters
+ escape_symbols = r"([\x21-\x29\x2b-\x2d\x3a-\x40\x5c\x60\x7b-\x7e])"
+ leave_others = "others"
+
+ # Tuples with regular expressions to be compiled and replacement data
+ re_whats = [
+ # Drop escape characters that might exist
+ (re.compile("\\\\"), ""),
+
+ # Temporarily escape dot characters
+ (re.compile(r"\."), "\xf6"),
+
+ # Temporarily change [0-9]+ type of patterns
+ (re.compile(r"\[0\-9\]\+"), "\xff"),
+
+ # Temporarily change [\d+-\d+] type of patterns
+ (re.compile(r"\[0\-\d+\]"), "\xff"),
+ (re.compile(r"\[0:\d+\]"), "\xff"),
+ (re.compile(r"\[(\d+)\]"), "\xf4\\\\d+\xf5"),
+
+ # Temporarily change [0-9] type of patterns
+ (re.compile(r"\[(\d)\-(\d)\]"), "\xf4\1-\2\xf5"),
+
+ # Handle multiple option patterns
+ (re.compile(r"[\{\<\[]([\w_]+)(?:[,|]+([\w_]+)){1,}[\}\>\]]"), r"(\1|\2)"),
+
+ # Handle wildcards
+ (re.compile(r"([^\/])\*"), "\\1\\\\w\xf7"),
+ (re.compile(r"/\*/"), "/.*/"),
+ (re.compile(r"/\xf6\xf6\xf6"), "/.*"),
+ (re.compile(r"\<[^\>]+\>"), "\\\\w\xf7"),
+ (re.compile(r"\{[^\}]+\}"), "\\\\w\xf7"),
+ (re.compile(r"\[[^\]]+\]"), "\\\\w\xf7"),
+
+ (re.compile(r"XX+"), "\\\\w\xf7"),
+ (re.compile(r"([^A-Z])[XYZ]([^A-Z])"), "\\1\\\\w\xf7\\2"),
+ (re.compile(r"([^A-Z])[XYZ]$"), "\\1\\\\w\xf7"),
+ (re.compile(r"_[AB]_"), "_\\\\w\xf7_"),
+
+ # Recover [0-9] type of patterns
+ (re.compile(r"\xf4"), "["),
+ (re.compile(r"\xf5"), "]"),
+
+ # Remove duplicated spaces
+ (re.compile(r"\s+"), r" "),
+
+ # Special case: drop comparison as in:
+ # What: foo = <something>
+ # (this happens on a few IIO definitions)
+ (re.compile(r"\s*\=.*$"), ""),
+
+ # Escape all other symbols
+ (re.compile(escape_symbols), r"\\\1"),
+ (re.compile(r"\\\\"), r"\\"),
+ (re.compile(r"\\([\[\]\(\)\|])"), r"\1"),
+ (re.compile(r"(\d+)\\(-\d+)"), r"\1\2"),
+
+ (re.compile(r"\xff"), r"\\d+"),
+
+ # Special case: IIO ABI which a parenthesis.
+ (re.compile(r"sqrt(.*)"), r"sqrt(.*)"),
+
+ # Simplify regexes with multiple .*
+ (re.compile(r"(?:\.\*){2,}"), ""),
+
+ # Recover dot characters
+ (re.compile(r"\xf6"), "\\."),
+ # Recover plus characters
+ (re.compile(r"\xf7"), "+"),
+ ]
+ re_has_num = re.compile(r"\\d")
+
+ # Symbol name after escape_chars that are considered a devnode basename
+ re_symbol_name = re.compile(r"(\w|\\[\.\-\:])+$")
+
+ # List of popular group names to be skipped to minimize regex group size
+ # Use AbiDebug.SUBGROUP_SIZE to detect those
+ skip_names = set(["devices", "hwmon"])
+
+ def regex_append(self, what, new):
+ """
+ Get a search group for a subset of regular expressions.
+
+ As ABI may have thousands of symbols, using a for to search all
+ regular expressions is at least O(n^2). When there are wildcards,
+ the complexity increases substantially, eventually becoming exponential.
+
+ To avoid spending too much time on them, use a logic to split
+ them into groups. The smaller the group, the better, as it would
+ mean that searches will be confined to a small number of regular
+ expressions.
+
+ The conversion to a regex subset is tricky, as we need something
+ that can be easily obtained from the sysfs symbol and from the
+ regular expression. So, we need to discard nodes that have
+ wildcards.
+
+ If it can't obtain a subgroup, place the regular expression inside
+ a special group (self.leave_others).
+ """
+
+ search_group = None
+
+ for search_group in reversed(new.split("/")):
+ if not search_group or search_group in self.skip_names:
+ continue
+ if self.re_symbol_name.match(search_group):
+ break
+
+ if not search_group:
+ search_group = self.leave_others
+
+ if self.debug & AbiDebug.SUBGROUP_MAP:
+ self.log.debug("%s: mapped as %s", what, search_group)
+
+ try:
+ if search_group not in self.regex_group:
+ self.regex_group[search_group] = []
+
+ self.regex_group[search_group].append(re.compile(new))
+ if self.search_string:
+ if what.find(self.search_string) >= 0:
+ print(f"What: {what}")
+ except re.PatternError:
+ self.log.warning("Ignoring '%s' as it produced an invalid regex:\n"
+ " '%s'", what, new)
+
+ def get_regexes(self, what):
+ """
+ Given an ABI devnode, return a list of all regular expressions that
+ may match it, based on the sub-groups created by regex_append()
+ """
+
+ re_list = []
+
+ patches = what.split("/")
+ patches.reverse()
+ patches.append(self.leave_others)
+
+ for search_group in patches:
+ if search_group in self.regex_group:
+ re_list += self.regex_group[search_group]
+
+ return re_list
+
+ def __init__(self, *args, **kwargs):
+ """
+ Override init method to get verbose argument
+ """
+
+ self.regex_group = None
+ self.search_string = None
+ self.re_string = None
+
+ if "search_string" in kwargs:
+ self.search_string = kwargs.get("search_string")
+ del kwargs["search_string"]
+
+ if self.search_string:
+
+ try:
+ self.re_string = re.compile(self.search_string)
+ except re.PatternError as e:
+ msg = f"{self.search_string} is not a valid regular expression"
+ raise ValueError(msg) from e
+
+ super().__init__(*args, **kwargs)
+
+ def parse_abi(self, *args, **kwargs):
+
+ super().parse_abi(*args, **kwargs)
+
+ self.regex_group = {}
+
+ print("Converting ABI What fields into regexes...", file=sys.stderr)
+
+ for t in sorted(self.data.items(), key=lambda x: x[0]):
+ v = t[1]
+ if v.get("type") == "File":
+ continue
+
+ v["regex"] = []
+
+ for what in v.get("what", []):
+ if not what.startswith("/sys"):
+ continue
+
+ new = what
+ for r, s in self.re_whats:
+ try:
+ new = r.sub(s, new)
+ except re.PatternError as e:
+ # Help debugging troubles with new regexes
+ raise re.PatternError(f"{e}\nwhile re.sub('{r.pattern}', {s}, str)") from e
+
+ v["regex"].append(new)
+
+ if self.debug & AbiDebug.REGEX:
+ self.log.debug("%-90s <== %s", new, what)
+
+ # Store regex into a subgroup to speedup searches
+ self.regex_append(what, new)
+
+ if self.debug & AbiDebug.SUBGROUP_DICT:
+ self.log.debug("%s", pformat(self.regex_group))
+
+ if self.debug & AbiDebug.SUBGROUP_SIZE:
+ biggestd_keys = sorted(self.regex_group.keys(),
+ key= lambda k: len(self.regex_group[k]),
+ reverse=True)
+
+ print("Top regex subgroups:", file=sys.stderr)
+ for k in biggestd_keys[:10]:
+ print(f"{k} has {len(self.regex_group[k])} elements", file=sys.stderr)
diff --git a/tools/lib/python/abi/helpers.py b/tools/lib/python/abi/helpers.py
new file mode 100644
index 000000000000..639b23e4ca33
--- /dev/null
+++ b/tools/lib/python/abi/helpers.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# pylint: disable=R0903
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Helper classes for ABI parser
+"""
+
+ABI_DIR = "Documentation/ABI/"
+
+
+class AbiDebug:
+ """Debug levels"""
+
+ WHAT_PARSING = 1
+ WHAT_OPEN = 2
+ DUMP_ABI_STRUCTS = 4
+ UNDEFINED = 8
+ REGEX = 16
+ SUBGROUP_MAP = 32
+ SUBGROUP_DICT = 64
+ SUBGROUP_SIZE = 128
+ GRAPH = 256
+
+
+DEBUG_HELP = """
+1 - enable debug parsing logic
+2 - enable debug messages on file open
+4 - enable debug for ABI parse data
+8 - enable extra debug information to identify troubles
+ with ABI symbols found at the local machine that
+ weren't found on ABI documentation (used only for
+ undefined subcommand)
+16 - enable debug for what to regex conversion
+32 - enable debug for symbol regex subgroups
+64 - enable debug for sysfs graph tree variable
+"""
diff --git a/tools/lib/python/abi/system_symbols.py b/tools/lib/python/abi/system_symbols.py
new file mode 100644
index 000000000000..4a2554da217b
--- /dev/null
+++ b/tools/lib/python/abi/system_symbols.py
@@ -0,0 +1,378 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0912,R0914,R0915,R1702
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+Parse ABI documentation and produce results from it.
+"""
+
+import os
+import re
+import sys
+
+from concurrent import futures
+from datetime import datetime
+from random import shuffle
+
+from abi.helpers import AbiDebug
+
+class SystemSymbols:
+ """Stores arguments for the class and initialize class vars"""
+
+ def graph_add_file(self, path, link=None):
+ """
+ add a file path to the sysfs graph stored at self.root
+ """
+
+ if path in self.files:
+ return
+
+ name = ""
+ ref = self.root
+ for edge in path.split("/"):
+ name += edge + "/"
+ if edge not in ref:
+ ref[edge] = {"__name": [name.rstrip("/")]}
+
+ ref = ref[edge]
+
+ if link and link not in ref["__name"]:
+ ref["__name"].append(link.rstrip("/"))
+
+ self.files.add(path)
+
+ def print_graph(self, root_prefix="", root=None, level=0):
+ """Prints a reference tree graph using UTF-8 characters"""
+
+ if not root:
+ root = self.root
+ level = 0
+
+ # Prevent endless traverse
+ if level > 5:
+ return
+
+ if level > 0:
+ prefix = "├──"
+ last_prefix = "└──"
+ else:
+ prefix = ""
+ last_prefix = ""
+
+ items = list(root.items())
+
+ names = root.get("__name", [])
+ for k, edge in items:
+ if k == "__name":
+ continue
+
+ if not k:
+ k = "/"
+
+ if len(names) > 1:
+ k += " links: " + ",".join(names[1:])
+
+ if edge == items[-1][1]:
+ print(root_prefix + last_prefix + k)
+ p = root_prefix
+ if level > 0:
+ p += " "
+ self.print_graph(p, edge, level + 1)
+ else:
+ print(root_prefix + prefix + k)
+ p = root_prefix + "│ "
+ self.print_graph(p, edge, level + 1)
+
+ def _walk(self, root):
+ """
+ Walk through sysfs to get all devnodes that aren't ignored.
+
+ By default, uses /sys as sysfs mounting point. If another
+ directory is used, it replaces them to /sys at the patches.
+ """
+
+ with os.scandir(root) as obj:
+ for entry in obj:
+ path = os.path.join(root, entry.name)
+ if self.sysfs:
+ p = path.replace(self.sysfs, "/sys", count=1)
+ else:
+ p = path
+
+ if self.re_ignore.search(p):
+ return
+
+ # Handle link first to avoid directory recursion
+ if entry.is_symlink():
+ real = os.path.realpath(path)
+ if not self.sysfs:
+ self.aliases[path] = real
+ else:
+ real = real.replace(self.sysfs, "/sys", count=1)
+
+ # Add absfile location to graph if it doesn't exist
+ if not self.re_ignore.search(real):
+ # Add link to the graph
+ self.graph_add_file(real, p)
+
+ elif entry.is_file():
+ self.graph_add_file(p)
+
+ elif entry.is_dir():
+ self._walk(path)
+
+ def __init__(self, abi, sysfs="/sys", hints=False):
+ """
+ Initialize internal variables and get a list of all files inside
+ sysfs that can currently be parsed.
+
+ Please notice that there are several entries on sysfs that aren't
+ documented as ABI. Ignore those.
+
+ The real paths will be stored under self.files. Aliases will be
+ stored in separate, as self.aliases.
+ """
+
+ self.abi = abi
+ self.log = abi.log
+
+ if sysfs != "/sys":
+ self.sysfs = sysfs.rstrip("/")
+ else:
+ self.sysfs = None
+
+ self.hints = hints
+
+ self.root = {}
+ self.aliases = {}
+ self.files = set()
+
+ dont_walk = [
+ # Those require root access and aren't documented at ABI
+ f"^{sysfs}/kernel/debug",
+ f"^{sysfs}/kernel/tracing",
+ f"^{sysfs}/fs/pstore",
+ f"^{sysfs}/fs/bpf",
+ f"^{sysfs}/fs/fuse",
+
+ # This is not documented at ABI
+ f"^{sysfs}/module",
+
+ f"^{sysfs}/fs/cgroup", # this is big and has zero docs under ABI
+ f"^{sysfs}/firmware", # documented elsewhere: ACPI, DT bindings
+ "sections|notes", # aren't actually part of ABI
+
+ # kernel-parameters.txt - not easy to parse
+ "parameters",
+ ]
+
+ self.re_ignore = re.compile("|".join(dont_walk))
+
+ print(f"Reading {sysfs} directory contents...", file=sys.stderr)
+ self._walk(sysfs)
+
+ def check_file(self, refs, found):
+ """Check missing ABI symbols for a given sysfs file"""
+
+ res_list = []
+
+ try:
+ for names in refs:
+ fname = names[0]
+
+ res = {
+ "found": False,
+ "fname": fname,
+ "msg": "",
+ }
+ res_list.append(res)
+
+ re_what = self.abi.get_regexes(fname)
+ if not re_what:
+ self.abi.log.warning(f"missing rules for {fname}")
+ continue
+
+ for name in names:
+ for r in re_what:
+ if self.abi.debug & AbiDebug.UNDEFINED:
+ self.log.debug("check if %s matches '%s'", name, r.pattern)
+ if r.match(name):
+ res["found"] = True
+ if found:
+ res["msg"] += f" {fname}: regex:\n\t"
+ continue
+
+ if self.hints and not res["found"]:
+ res["msg"] += f" {fname} not found. Tested regexes:\n"
+ for r in re_what:
+ res["msg"] += " " + r.pattern + "\n"
+
+ except KeyboardInterrupt:
+ pass
+
+ return res_list
+
+ def _ref_interactor(self, root):
+ """Recursive function to interact over the sysfs tree"""
+
+ for k, v in root.items():
+ if isinstance(v, dict):
+ yield from self._ref_interactor(v)
+
+ if root == self.root or k == "__name":
+ continue
+
+ if self.abi.re_string:
+ fname = v["__name"][0]
+ if self.abi.re_string.search(fname):
+ yield v
+ else:
+ yield v
+
+
+ def get_fileref(self, all_refs, chunk_size):
+ """Interactor to group refs into chunks"""
+
+ n = 0
+ refs = []
+
+ for ref in all_refs:
+ refs.append(ref)
+
+ n += 1
+ if n >= chunk_size:
+ yield refs
+ n = 0
+ refs = []
+
+ yield refs
+
+ def check_undefined_symbols(self, max_workers=None, chunk_size=50,
+ found=None, dry_run=None):
+ """Seach ABI for sysfs symbols missing documentation"""
+
+ self.abi.parse_abi()
+
+ if self.abi.debug & AbiDebug.GRAPH:
+ self.print_graph()
+
+ all_refs = []
+ for ref in self._ref_interactor(self.root):
+ all_refs.append(ref["__name"])
+
+ if dry_run:
+ print("Would check", file=sys.stderr)
+ for ref in all_refs:
+ print(", ".join(ref))
+
+ return
+
+ print("Starting to search symbols (it may take several minutes):",
+ file=sys.stderr)
+ start = datetime.now()
+ old_elapsed = None
+
+ # Python doesn't support multithreading due to limitations on its
+ # global lock (GIL). While Python 3.13 finally made GIL optional,
+ # there are still issues related to it. Also, we want to have
+ # backward compatibility with older versions of Python.
+ #
+ # So, use instead multiprocess. However, Python is very slow passing
+ # data from/to multiple processes. Also, it may consume lots of memory
+ # if the data to be shared is not small. So, we need to group workload
+ # in chunks that are big enough to generate performance gains while
+ # not being so big that would cause out-of-memory.
+
+ num_refs = len(all_refs)
+ print(f"Number of references to parse: {num_refs}", file=sys.stderr)
+
+ if not max_workers:
+ max_workers = os.cpu_count()
+ elif max_workers > os.cpu_count():
+ max_workers = os.cpu_count()
+
+ max_workers = max(max_workers, 1)
+
+ max_chunk_size = int((num_refs + max_workers - 1) / max_workers)
+ chunk_size = min(chunk_size, max_chunk_size)
+ chunk_size = max(1, chunk_size)
+
+ if max_workers > 1:
+ executor = futures.ProcessPoolExecutor
+
+ # Place references in a random order. This may help improving
+ # performance, by mixing complex/simple expressions when creating
+ # chunks
+ shuffle(all_refs)
+ else:
+ # Python has a high overhead with processes. When there's just
+ # one worker, it is faster to not create a new process.
+ # Yet, User still deserves to have a progress print. So, use
+ # python's "thread", which is actually a single process, using
+ # an internal schedule to switch between tasks. No performance
+ # gains for non-IO tasks, but still it can be quickly interrupted
+ # from time to time to display progress.
+ executor = futures.ThreadPoolExecutor
+
+ not_found = []
+ f_list = []
+ with executor(max_workers=max_workers) as exe:
+ for refs in self.get_fileref(all_refs, chunk_size):
+ if refs:
+ try:
+ f_list.append(exe.submit(self.check_file, refs, found))
+
+ except KeyboardInterrupt:
+ return
+
+ total = len(f_list)
+
+ if not total:
+ if self.abi.re_string:
+ print(f"No ABI symbol matches {self.abi.search_string}")
+ else:
+ self.abi.log.warning("No ABI symbols found")
+ return
+
+ print(f"{len(f_list):6d} jobs queued on {max_workers} workers",
+ file=sys.stderr)
+
+ while f_list:
+ try:
+ t = futures.wait(f_list, timeout=1,
+ return_when=futures.FIRST_COMPLETED)
+
+ done = t[0]
+
+ for fut in done:
+ res_list = fut.result()
+
+ for res in res_list:
+ if not res["found"]:
+ not_found.append(res["fname"])
+ if res["msg"]:
+ print(res["msg"])
+
+ f_list.remove(fut)
+ except KeyboardInterrupt:
+ return
+
+ except RuntimeError as e:
+ self.abi.log.warning(f"Future: {e}")
+ break
+
+ if sys.stderr.isatty():
+ elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
+ if len(f_list) < total:
+ elapsed += f" ({total - len(f_list)}/{total} jobs completed). "
+ if elapsed != old_elapsed:
+ print(elapsed + "\r", end="", flush=True,
+ file=sys.stderr)
+ old_elapsed = elapsed
+
+ elapsed = str(datetime.now() - start).split(".", maxsplit=1)[0]
+ print(elapsed, file=sys.stderr)
+
+ for f in sorted(not_found):
+ print(f"{f} not found.")
diff --git a/tools/lib/python/feat/parse_features.py b/tools/lib/python/feat/parse_features.py
new file mode 100755
index 000000000000..b88c04d3e2fe
--- /dev/null
+++ b/tools/lib/python/feat/parse_features.py
@@ -0,0 +1,494 @@
+#!/usr/bin/env python3
+# pylint: disable=R0902,R0911,R0912,R0914,R0915
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+# SPDX-License-Identifier: GPL-2.0
+
+
+"""
+Library to parse the Linux Feature files and produce a ReST book.
+"""
+
+import os
+import re
+import sys
+
+from glob import iglob
+
+
+class ParseFeature:
+ """
+ Parses Documentation/features, allowing to generate ReST documentation
+ from it.
+ """
+
+ h_name = "Feature"
+ h_kconfig = "Kconfig"
+ h_description = "Description"
+ h_subsys = "Subsystem"
+ h_status = "Status"
+ h_arch = "Architecture"
+
+ # Sort order for status. Others will be mapped at the end.
+ status_map = {
+ "ok": 0,
+ "TODO": 1,
+ "N/A": 2,
+ # The only missing status is "..", which was mapped as "---",
+ # as this is an special ReST cell value. Let it get the
+ # default order (99).
+ }
+
+ def __init__(self, prefix, debug=0, enable_fname=False):
+ """
+ Sets internal variables
+ """
+
+ self.prefix = prefix
+ self.debug = debug
+ self.enable_fname = enable_fname
+
+ self.data = {}
+
+ # Initial maximum values use just the headers
+ self.max_size_name = len(self.h_name)
+ self.max_size_kconfig = len(self.h_kconfig)
+ self.max_size_description = len(self.h_description)
+ self.max_size_desc_word = 0
+ self.max_size_subsys = len(self.h_subsys)
+ self.max_size_status = len(self.h_status)
+ self.max_size_arch = len(self.h_arch)
+ self.max_size_arch_with_header = self.max_size_arch + self.max_size_arch
+ self.description_size = 1
+
+ self.msg = ""
+
+ def emit(self, msg="", end="\n"):
+ self.msg += msg + end
+
+ def parse_error(self, fname, ln, msg, data=None):
+ """
+ Displays an error message, printing file name and line
+ """
+
+ if ln:
+ fname += f"#{ln}"
+
+ print(f"Warning: file {fname}: {msg}", file=sys.stderr, end="")
+
+ if data:
+ data = data.rstrip()
+ print(f":\n\t{data}", file=sys.stderr)
+ else:
+ print("", file=sys.stderr)
+
+ def parse_feat_file(self, fname):
+ """Parses a single arch-support.txt feature file"""
+
+ if os.path.isdir(fname):
+ return
+
+ base = os.path.basename(fname)
+
+ if base != "arch-support.txt":
+ if self.debug:
+ print(f"ignoring {fname}", file=sys.stderr)
+ return
+
+ subsys = os.path.dirname(fname).split("/")[-2]
+ self.max_size_subsys = max(self.max_size_subsys, len(subsys))
+
+ feature_name = ""
+ kconfig = ""
+ description = ""
+ comments = ""
+ arch_table = {}
+
+ if self.debug > 1:
+ print(f"Opening {fname}", file=sys.stderr)
+
+ if self.enable_fname:
+ full_fname = os.path.abspath(fname)
+ self.emit(f".. FILE {full_fname}")
+
+ with open(fname, encoding="utf-8") as f:
+ for ln, line in enumerate(f, start=1):
+ line = line.strip()
+
+ match = re.match(r"^\#\s+Feature\s+name:\s*(.*\S)", line)
+ if match:
+ feature_name = match.group(1)
+
+ self.max_size_name = max(self.max_size_name,
+ len(feature_name))
+ continue
+
+ match = re.match(r"^\#\s+Kconfig:\s*(.*\S)", line)
+ if match:
+ kconfig = match.group(1)
+
+ self.max_size_kconfig = max(self.max_size_kconfig,
+ len(kconfig))
+ continue
+
+ match = re.match(r"^\#\s+description:\s*(.*\S)", line)
+ if match:
+ description = match.group(1)
+
+ self.max_size_description = max(self.max_size_description,
+ len(description))
+
+ words = re.split(r"\s+", line)[1:]
+ for word in words:
+ self.max_size_desc_word = max(self.max_size_desc_word,
+ len(word))
+
+ continue
+
+ if re.search(r"^\\s*$", line):
+ continue
+
+ if re.match(r"^\s*\-+\s*$", line):
+ continue
+
+ if re.search(r"^\s*\|\s*arch\s*\|\s*status\s*\|\s*$", line):
+ continue
+
+ match = re.match(r"^\#\s*(.*)$", line)
+ if match:
+ comments += match.group(1)
+ continue
+
+ match = re.match(r"^\s*\|\s*(\S+):\s*\|\s*(\S+)\s*\|\s*$", line)
+ if match:
+ arch = match.group(1)
+ status = match.group(2)
+
+ self.max_size_status = max(self.max_size_status,
+ len(status))
+ self.max_size_arch = max(self.max_size_arch, len(arch))
+
+ if status == "..":
+ status = "---"
+
+ arch_table[arch] = status
+
+ continue
+
+ self.parse_error(fname, ln, "Line is invalid", line)
+
+ if not feature_name:
+ self.parse_error(fname, 0, "Feature name not found")
+ return
+ if not subsys:
+ self.parse_error(fname, 0, "Subsystem not found")
+ return
+ if not kconfig:
+ self.parse_error(fname, 0, "Kconfig not found")
+ return
+ if not description:
+ self.parse_error(fname, 0, "Description not found")
+ return
+ if not arch_table:
+ self.parse_error(fname, 0, "Architecture table not found")
+ return
+
+ self.data[feature_name] = {
+ "where": fname,
+ "subsys": subsys,
+ "kconfig": kconfig,
+ "description": description,
+ "comments": comments,
+ "table": arch_table,
+ }
+
+ self.max_size_arch_with_header = self.max_size_arch + len(self.h_arch)
+
+ def parse(self):
+ """Parses all arch-support.txt feature files inside self.prefix"""
+
+ path = os.path.expanduser(self.prefix)
+
+ if self.debug > 2:
+ print(f"Running parser for {path}")
+
+ example_path = os.path.join(path, "arch-support.txt")
+
+ for fname in iglob(os.path.join(path, "**"), recursive=True):
+ if fname != example_path:
+ self.parse_feat_file(fname)
+
+ return self.data
+
+ def output_arch_table(self, arch, feat=None):
+ """
+ Output feature(s) for a given architecture.
+ """
+
+ title = f"Feature status on {arch} architecture"
+
+ self.emit("=" * len(title))
+ self.emit(title)
+ self.emit("=" * len(title))
+ self.emit()
+
+ self.emit("=" * self.max_size_subsys + " ", end="")
+ self.emit("=" * self.max_size_name + " ", end="")
+ self.emit("=" * self.max_size_kconfig + " ", end="")
+ self.emit("=" * self.max_size_status + " ", end="")
+ self.emit("=" * self.max_size_description)
+
+ self.emit(f"{self.h_subsys:<{self.max_size_subsys}} ", end="")
+ self.emit(f"{self.h_name:<{self.max_size_name}} ", end="")
+ self.emit(f"{self.h_kconfig:<{self.max_size_kconfig}} ", end="")
+ self.emit(f"{self.h_status:<{self.max_size_status}} ", end="")
+ self.emit(f"{self.h_description:<{self.max_size_description}}")
+
+ self.emit("=" * self.max_size_subsys + " ", end="")
+ self.emit("=" * self.max_size_name + " ", end="")
+ self.emit("=" * self.max_size_kconfig + " ", end="")
+ self.emit("=" * self.max_size_status + " ", end="")
+ self.emit("=" * self.max_size_description)
+
+ sorted_features = sorted(self.data.keys(),
+ key=lambda x: (self.data[x]["subsys"],
+ x.lower()))
+
+ for name in sorted_features:
+ if feat and name != feat:
+ continue
+
+ arch_table = self.data[name]["table"]
+
+ if not arch in arch_table:
+ continue
+
+ self.emit(f"{self.data[name]['subsys']:<{self.max_size_subsys}} ",
+ end="")
+ self.emit(f"{name:<{self.max_size_name}} ", end="")
+ self.emit(f"{self.data[name]['kconfig']:<{self.max_size_kconfig}} ",
+ end="")
+ self.emit(f"{arch_table[arch]:<{self.max_size_status}} ",
+ end="")
+ self.emit(f"{self.data[name]['description']}")
+
+ self.emit("=" * self.max_size_subsys + " ", end="")
+ self.emit("=" * self.max_size_name + " ", end="")
+ self.emit("=" * self.max_size_kconfig + " ", end="")
+ self.emit("=" * self.max_size_status + " ", end="")
+ self.emit("=" * self.max_size_description)
+
+ return self.msg
+
+ def output_feature(self, feat):
+ """
+ Output a feature on all architectures
+ """
+
+ title = f"Feature {feat}"
+
+ self.emit("=" * len(title))
+ self.emit(title)
+ self.emit("=" * len(title))
+ self.emit()
+
+ if not feat in self.data:
+ return
+
+ if self.data[feat]["subsys"]:
+ self.emit(f":Subsystem: {self.data[feat]['subsys']}")
+ if self.data[feat]["kconfig"]:
+ self.emit(f":Kconfig: {self.data[feat]['kconfig']}")
+
+ desc = self.data[feat]["description"]
+ desc = desc[0].upper() + desc[1:]
+ desc = desc.rstrip(". \t")
+ self.emit(f"\n{desc}.\n")
+
+ com = self.data[feat]["comments"].strip()
+ if com:
+ self.emit("Comments")
+ self.emit("--------")
+ self.emit(f"\n{com}\n")
+
+ self.emit("=" * self.max_size_arch + " ", end="")
+ self.emit("=" * self.max_size_status)
+
+ self.emit(f"{self.h_arch:<{self.max_size_arch}} ", end="")
+ self.emit(f"{self.h_status:<{self.max_size_status}}")
+
+ self.emit("=" * self.max_size_arch + " ", end="")
+ self.emit("=" * self.max_size_status)
+
+ arch_table = self.data[feat]["table"]
+ for arch in sorted(arch_table.keys()):
+ self.emit(f"{arch:<{self.max_size_arch}} ", end="")
+ self.emit(f"{arch_table[arch]:<{self.max_size_status}}")
+
+ self.emit("=" * self.max_size_arch + " ", end="")
+ self.emit("=" * self.max_size_status)
+
+ return self.msg
+
+ def matrix_lines(self, desc_size, max_size_status, header):
+ """
+ Helper function to split element tables at the output matrix
+ """
+
+ if header:
+ ln_marker = "="
+ else:
+ ln_marker = "-"
+
+ self.emit("+" + ln_marker * self.max_size_name + "+", end="")
+ self.emit(ln_marker * desc_size, end="")
+ self.emit("+" + ln_marker * max_size_status + "+")
+
+ def output_matrix(self):
+ """
+ Generates a set of tables, groped by subsystem, containing
+ what's the feature state on each architecture.
+ """
+
+ title = "Feature status on all architectures"
+
+ self.emit("=" * len(title))
+ self.emit(title)
+ self.emit("=" * len(title))
+ self.emit()
+
+ desc_title = f"{self.h_kconfig} / {self.h_description}"
+
+ desc_size = self.max_size_kconfig + 4
+ if not self.description_size:
+ desc_size = max(self.max_size_description, desc_size)
+ else:
+ desc_size = max(self.description_size, desc_size)
+
+ desc_size = max(self.max_size_desc_word, desc_size, len(desc_title))
+
+ notcompat = "Not compatible"
+ self.max_size_status = max(self.max_size_status, len(notcompat))
+
+ min_status_size = self.max_size_status + self.max_size_arch + 4
+ max_size_status = max(min_status_size, self.max_size_status)
+
+ h_status_per_arch = "Status per architecture"
+ max_size_status = max(max_size_status, len(h_status_per_arch))
+
+ cur_subsys = None
+ for name in sorted(self.data.keys(),
+ key=lambda x: (self.data[x]["subsys"], x.lower())):
+ if not cur_subsys or cur_subsys != self.data[name]["subsys"]:
+ if cur_subsys:
+ self.emit()
+
+ cur_subsys = self.data[name]["subsys"]
+
+ title = f"Subsystem: {cur_subsys}"
+ self.emit(title)
+ self.emit("=" * len(title))
+ self.emit()
+
+ self.matrix_lines(desc_size, max_size_status, 0)
+
+ self.emit(f"|{self.h_name:<{self.max_size_name}}", end="")
+ self.emit(f"|{desc_title:<{desc_size}}", end="")
+ self.emit(f"|{h_status_per_arch:<{max_size_status}}|")
+
+ self.matrix_lines(desc_size, max_size_status, 1)
+
+ lines = []
+ descs = []
+ cur_status = ""
+ line = ""
+
+ arch_table = sorted(self.data[name]["table"].items(),
+ key=lambda x: (self.status_map.get(x[1], 99),
+ x[0].lower()))
+
+ for arch, status in arch_table:
+ if status == "---":
+ status = notcompat
+
+ if status != cur_status:
+ if line != "":
+ lines.append(line)
+ line = ""
+ line = f"- **{status}**: {arch}"
+ elif len(line) + len(arch) + 2 < max_size_status:
+ line += f", {arch}"
+ else:
+ lines.append(line)
+ line = f" {arch}"
+ cur_status = status
+
+ if line != "":
+ lines.append(line)
+
+ description = self.data[name]["description"]
+ while len(description) > desc_size:
+ desc_line = description[:desc_size]
+
+ last_space = desc_line.rfind(" ")
+ if last_space != -1:
+ desc_line = desc_line[:last_space]
+ descs.append(desc_line)
+ description = description[last_space + 1:]
+ else:
+ desc_line = desc_line[:-1]
+ descs.append(desc_line + "\\")
+ description = description[len(desc_line):]
+
+ if description:
+ descs.append(description)
+
+ while len(lines) < 2 + len(descs):
+ lines.append("")
+
+ for ln, line in enumerate(lines):
+ col = ["", ""]
+
+ if not ln:
+ col[0] = name
+ col[1] = f"``{self.data[name]['kconfig']}``"
+ else:
+ if ln >= 2 and descs:
+ col[1] = descs.pop(0)
+
+ self.emit(f"|{col[0]:<{self.max_size_name}}", end="")
+ self.emit(f"|{col[1]:<{desc_size}}", end="")
+ self.emit(f"|{line:<{max_size_status}}|")
+
+ self.matrix_lines(desc_size, max_size_status, 0)
+
+ return self.msg
+
+ def list_arch_features(self, arch, feat):
+ """
+ Print a matrix of kernel feature support for the chosen architecture.
+ """
+ self.emit("#")
+ self.emit(f"# Kernel feature support matrix of the '{arch}' architecture:")
+ self.emit("#")
+
+ # Sort by subsystem, then by feature name (case‑insensitive)
+ for name in sorted(self.data.keys(),
+ key=lambda n: (self.data[n]["subsys"].lower(),
+ n.lower())):
+ if feat and name != feat:
+ continue
+
+ feature = self.data[name]
+ arch_table = feature["table"]
+ status = arch_table.get(arch, "")
+ status = " " * ((4 - len(status)) // 2) + status
+
+ self.emit(f"{feature['subsys']:>{self.max_size_subsys + 1}}/ ",
+ end="")
+ self.emit(f"{name:<{self.max_size_name}}: ", end="")
+ self.emit(f"{status:<5}| ", end="")
+ self.emit(f"{feature['kconfig']:>{self.max_size_kconfig}} ",
+ end="")
+ self.emit(f"# {feature['description']}")
+
+ return self.msg
diff --git a/tools/lib/python/jobserver.py b/tools/lib/python/jobserver.py
new file mode 100755
index 000000000000..a24f30ef4fa8
--- /dev/null
+++ b/tools/lib/python/jobserver.py
@@ -0,0 +1,149 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0+
+#
+# pylint: disable=C0103,C0209
+#
+#
+
+"""
+Interacts with the POSIX jobserver during the Kernel build time.
+
+A "normal" jobserver task, like the one initiated by a make subrocess would do:
+
+ - open read/write file descriptors to communicate with the job server;
+ - ask for one slot by calling:
+ claim = os.read(reader, 1)
+ - when the job finshes, call:
+ os.write(writer, b"+") # os.write(writer, claim)
+
+Here, the goal is different: This script aims to get the remaining number
+of slots available, using all of them to run a command which handle tasks in
+parallel. To to that, it has a loop that ends only after there are no
+slots left. It then increments the number by one, in order to allow a
+call equivalent to make -j$((claim+1)), e.g. having a parent make creating
+$claim child to do the actual work.
+
+The end goal here is to keep the total number of build tasks under the
+limit established by the initial make -j$n_proc call.
+
+See:
+ https://www.gnu.org/software/make/manual/html_node/POSIX-Jobserver.html#POSIX-Jobserver
+"""
+
+import errno
+import os
+import subprocess
+import sys
+
+class JobserverExec:
+ """
+ Claim all slots from make using POSIX Jobserver.
+
+ The main methods here are:
+ - open(): reserves all slots;
+ - close(): method returns all used slots back to make;
+ - run(): executes a command setting PARALLELISM=<available slots jobs + 1>
+ """
+
+ def __init__(self):
+ """Initialize internal vars"""
+ self.claim = 0
+ self.jobs = b""
+ self.reader = None
+ self.writer = None
+ self.is_open = False
+
+ def open(self):
+ """Reserve all available slots to be claimed later on"""
+
+ if self.is_open:
+ return
+
+ try:
+ # Fetch the make environment options.
+ flags = os.environ["MAKEFLAGS"]
+ # Look for "--jobserver=R,W"
+ # Note that GNU Make has used --jobserver-fds and --jobserver-auth
+ # so this handles all of them.
+ opts = [x for x in flags.split(" ") if x.startswith("--jobserver")]
+
+ # Parse out R,W file descriptor numbers and set them nonblocking.
+ # If the MAKEFLAGS variable contains multiple instances of the
+ # --jobserver-auth= option, the last one is relevant.
+ fds = opts[-1].split("=", 1)[1]
+
+ # Starting with GNU Make 4.4, named pipes are used for reader
+ # and writer.
+ # Example argument: --jobserver-auth=fifo:/tmp/GMfifo8134
+ _, _, path = fds.partition("fifo:")
+
+ if path:
+ self.reader = os.open(path, os.O_RDONLY | os.O_NONBLOCK)
+ self.writer = os.open(path, os.O_WRONLY)
+ else:
+ self.reader, self.writer = [int(x) for x in fds.split(",", 1)]
+ # Open a private copy of reader to avoid setting nonblocking
+ # on an unexpecting process with the same reader fd.
+ self.reader = os.open("/proc/self/fd/%d" % (self.reader),
+ os.O_RDONLY | os.O_NONBLOCK)
+
+ # Read out as many jobserver slots as possible
+ while True:
+ try:
+ slot = os.read(self.reader, 8)
+ self.jobs += slot
+ except (OSError, IOError) as e:
+ if e.errno == errno.EWOULDBLOCK:
+ # Stop at the end of the jobserver queue.
+ break
+ # If something went wrong, give back the jobs.
+ if self.jobs:
+ os.write(self.writer, self.jobs)
+ raise e
+
+ # Add a bump for our caller's reserveration, since we're just going
+ # to sit here blocked on our child.
+ self.claim = len(self.jobs) + 1
+
+ except (KeyError, IndexError, ValueError, OSError, IOError):
+ # Any missing environment strings or bad fds should result in just
+ # not being parallel.
+ self.claim = None
+
+ self.is_open = True
+
+ def close(self):
+ """Return all reserved slots to Jobserver"""
+
+ if not self.is_open:
+ return
+
+ # Return all the reserved slots.
+ if len(self.jobs):
+ os.write(self.writer, self.jobs)
+
+ self.is_open = False
+
+ def __enter__(self):
+ self.open()
+ return self
+
+ def __exit__(self, exc_type, exc_value, exc_traceback):
+ self.close()
+
+ def run(self, cmd, *args, **pwargs):
+ """
+ Run a command setting PARALLELISM env variable to the number of
+ available job slots (claim) + 1, e.g. it will reserve claim slots
+ to do the actual build work, plus one to monitor its children.
+ """
+ self.open() # Ensure that self.claim is set
+
+ # We can only claim parallelism if there was a jobserver (i.e. a
+ # top-level "-jN" argument) and there were no other failures. Otherwise
+ # leave out the environment variable and let the child figure out what
+ # is best.
+ if self.claim:
+ os.environ["PARALLELISM"] = str(self.claim)
+
+ return subprocess.call(cmd, *args, **pwargs)
diff --git a/tools/lib/python/kdoc/__init__.py b/tools/lib/python/kdoc/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/lib/python/kdoc/__init__.py
diff --git a/tools/lib/python/kdoc/enrich_formatter.py b/tools/lib/python/kdoc/enrich_formatter.py
new file mode 100644
index 000000000000..bb171567a4ca
--- /dev/null
+++ b/tools/lib/python/kdoc/enrich_formatter.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+"""
+Ancillary argparse HelpFormatter class that works on a similar way as
+argparse.RawDescriptionHelpFormatter, e.g. description maintains line
+breaks, but it also implement transformations to the help text. The
+actual transformations ar given by enrich_text(), if the output is tty.
+
+Currently, the follow transformations are done:
+
+ - Positional arguments are shown in upper cases;
+ - if output is TTY, ``var`` and positional arguments are shown prepended
+ by an ANSI SGR code. This is usually translated to bold. On some
+ terminals, like, konsole, this is translated into a colored bold text.
+"""
+
+import argparse
+import re
+import sys
+
+class EnrichFormatter(argparse.HelpFormatter):
+ """
+ Better format the output, making easier to identify the positional args
+ and how they're used at the __doc__ description.
+ """
+ def __init__(self, *args, **kwargs):
+ """Initialize class and check if is TTY"""
+ super().__init__(*args, **kwargs)
+ self._tty = sys.stdout.isatty()
+
+ def enrich_text(self, text):
+ """Handle ReST markups (currently, only ``foo``)"""
+ if self._tty and text:
+ # Replace ``text`` with ANSI SGR (bold)
+ return re.sub(r'\`\`(.+?)\`\`',
+ lambda m: f'\033[1m{m.group(1)}\033[0m', text)
+ return text
+
+ def _fill_text(self, text, width, indent):
+ """Enrich descriptions with markups on it"""
+ enriched = self.enrich_text(text)
+ return "\n".join(indent + line for line in enriched.splitlines())
+
+ def _format_usage(self, usage, actions, groups, prefix):
+ """Enrich positional arguments at usage: line"""
+
+ prog = self._prog
+ parts = []
+
+ for action in actions:
+ if action.option_strings:
+ opt = action.option_strings[0]
+ if action.nargs != 0:
+ opt += f" {action.dest.upper()}"
+ parts.append(f"[{opt}]")
+ else:
+ # Positional argument
+ parts.append(self.enrich_text(f"``{action.dest.upper()}``"))
+
+ usage_text = f"{prefix or 'usage: '} {prog} {' '.join(parts)}\n"
+ return usage_text
+
+ def _format_action_invocation(self, action):
+ """Enrich argument names"""
+ if not action.option_strings:
+ return self.enrich_text(f"``{action.dest.upper()}``")
+
+ return ", ".join(action.option_strings)
diff --git a/tools/lib/python/kdoc/kdoc_files.py b/tools/lib/python/kdoc/kdoc_files.py
new file mode 100644
index 000000000000..bfe02baf1606
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_files.py
@@ -0,0 +1,294 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=R0903,R0913,R0914,R0917
+
+"""
+Parse lernel-doc tags on multiple kernel source files.
+"""
+
+import argparse
+import logging
+import os
+import re
+
+from kdoc.kdoc_parser import KernelDoc
+from kdoc.kdoc_output import OutputFormat
+
+
+class GlobSourceFiles:
+ """
+ Parse C source code file names and directories via an Interactor.
+ """
+
+ def __init__(self, srctree=None, valid_extensions=None):
+ """
+ Initialize valid extensions with a tuple.
+
+ If not defined, assume default C extensions (.c and .h)
+
+ It would be possible to use python's glob function, but it is
+ very slow, and it is not interactive. So, it would wait to read all
+ directories before actually do something.
+
+ So, let's use our own implementation.
+ """
+
+ if not valid_extensions:
+ self.extensions = (".c", ".h")
+ else:
+ self.extensions = valid_extensions
+
+ self.srctree = srctree
+
+ def _parse_dir(self, dirname):
+ """Internal function to parse files recursively"""
+
+ with os.scandir(dirname) as obj:
+ for entry in obj:
+ name = os.path.join(dirname, entry.name)
+
+ if entry.is_dir(follow_symlinks=False):
+ yield from self._parse_dir(name)
+
+ if not entry.is_file():
+ continue
+
+ basename = os.path.basename(name)
+
+ if not basename.endswith(self.extensions):
+ continue
+
+ yield name
+
+ def parse_files(self, file_list, file_not_found_cb):
+ """
+ Define an iterator to parse all source files from file_list,
+ handling directories if any
+ """
+
+ if not file_list:
+ return
+
+ for fname in file_list:
+ if self.srctree:
+ f = os.path.join(self.srctree, fname)
+ else:
+ f = fname
+
+ if os.path.isdir(f):
+ yield from self._parse_dir(f)
+ elif os.path.isfile(f):
+ yield f
+ elif file_not_found_cb:
+ file_not_found_cb(fname)
+
+
+class KernelFiles():
+ """
+ Parse kernel-doc tags on multiple kernel source files.
+
+ There are two type of parsers defined here:
+ - self.parse_file(): parses both kernel-doc markups and
+ EXPORT_SYMBOL* macros;
+ - self.process_export_file(): parses only EXPORT_SYMBOL* macros.
+ """
+
+ def warning(self, msg):
+ """Ancillary routine to output a warning and increment error count"""
+
+ self.config.log.warning(msg)
+ self.errors += 1
+
+ def error(self, msg):
+ """Ancillary routine to output an error and increment error count"""
+
+ self.config.log.error(msg)
+ self.errors += 1
+
+ def parse_file(self, fname):
+ """
+ Parse a single Kernel source.
+ """
+
+ # Prevent parsing the same file twice if results are cached
+ if fname in self.files:
+ return
+
+ doc = KernelDoc(self.config, fname)
+ export_table, entries = doc.parse_kdoc()
+
+ self.export_table[fname] = export_table
+
+ self.files.add(fname)
+ self.export_files.add(fname) # parse_kdoc() already check exports
+
+ self.results[fname] = entries
+
+ def process_export_file(self, fname):
+ """
+ Parses EXPORT_SYMBOL* macros from a single Kernel source file.
+ """
+
+ # Prevent parsing the same file twice if results are cached
+ if fname in self.export_files:
+ return
+
+ doc = KernelDoc(self.config, fname)
+ export_table = doc.parse_export()
+
+ if not export_table:
+ self.error(f"Error: Cannot check EXPORT_SYMBOL* on {fname}")
+ export_table = set()
+
+ self.export_table[fname] = export_table
+ self.export_files.add(fname)
+
+ def file_not_found_cb(self, fname):
+ """
+ Callback to warn if a file was not found.
+ """
+
+ self.error(f"Cannot find file {fname}")
+
+ def __init__(self, verbose=False, out_style=None,
+ werror=False, wreturn=False, wshort_desc=False,
+ wcontents_before_sections=False,
+ logger=None):
+ """
+ Initialize startup variables and parse all files
+ """
+
+ if not verbose:
+ verbose = bool(os.environ.get("KBUILD_VERBOSE", 0))
+
+ if out_style is None:
+ out_style = OutputFormat()
+
+ if not werror:
+ kcflags = os.environ.get("KCFLAGS", None)
+ if kcflags:
+ match = re.search(r"(\s|^)-Werror(\s|$)/", kcflags)
+ if match:
+ werror = True
+
+ # reading this variable is for backwards compat just in case
+ # someone was calling it with the variable from outside the
+ # kernel's build system
+ kdoc_werror = os.environ.get("KDOC_WERROR", None)
+ if kdoc_werror:
+ werror = kdoc_werror
+
+ # Some variables are global to the parser logic as a whole as they are
+ # used to send control configuration to KernelDoc class. As such,
+ # those variables are read-only inside the KernelDoc.
+ self.config = argparse.Namespace
+
+ self.config.verbose = verbose
+ self.config.werror = werror
+ self.config.wreturn = wreturn
+ self.config.wshort_desc = wshort_desc
+ self.config.wcontents_before_sections = wcontents_before_sections
+
+ if not logger:
+ self.config.log = logging.getLogger("kernel-doc")
+ else:
+ self.config.log = logger
+
+ self.config.warning = self.warning
+
+ self.config.src_tree = os.environ.get("SRCTREE", None)
+
+ # Initialize variables that are internal to KernelFiles
+
+ self.out_style = out_style
+
+ self.errors = 0
+ self.results = {}
+
+ self.files = set()
+ self.export_files = set()
+ self.export_table = {}
+
+ def parse(self, file_list, export_file=None):
+ """
+ Parse all files
+ """
+
+ glob = GlobSourceFiles(srctree=self.config.src_tree)
+
+ for fname in glob.parse_files(file_list, self.file_not_found_cb):
+ self.parse_file(fname)
+
+ for fname in glob.parse_files(export_file, self.file_not_found_cb):
+ self.process_export_file(fname)
+
+ def out_msg(self, fname, name, arg):
+ """
+ Return output messages from a file name using the output style
+ filtering.
+
+ If output type was not handled by the styler, return None.
+ """
+
+ # NOTE: we can add rules here to filter out unwanted parts,
+ # although OutputFormat.msg already does that.
+
+ return self.out_style.msg(fname, name, arg)
+
+ def msg(self, enable_lineno=False, export=False, internal=False,
+ symbol=None, nosymbol=None, no_doc_sections=False,
+ filenames=None, export_file=None):
+ """
+ Interacts over the kernel-doc results and output messages,
+ returning kernel-doc markups on each interaction
+ """
+
+ self.out_style.set_config(self.config)
+
+ if not filenames:
+ filenames = sorted(self.results.keys())
+
+ glob = GlobSourceFiles(srctree=self.config.src_tree)
+
+ for fname in filenames:
+ function_table = set()
+
+ if internal or export:
+ if not export_file:
+ export_file = [fname]
+
+ for f in glob.parse_files(export_file, self.file_not_found_cb):
+ function_table |= self.export_table[f]
+
+ if symbol:
+ for s in symbol:
+ function_table.add(s)
+
+ self.out_style.set_filter(export, internal, symbol, nosymbol,
+ function_table, enable_lineno,
+ no_doc_sections)
+
+ msg = ""
+ if fname not in self.results:
+ self.config.log.warning("No kernel-doc for file %s", fname)
+ continue
+
+ symbols = self.results[fname]
+ self.out_style.set_symbols(symbols)
+
+ for arg in symbols:
+ m = self.out_msg(fname, arg.name, arg)
+
+ if m is None:
+ ln = arg.get("ln", 0)
+ dtype = arg.get('type', "")
+
+ self.config.log.warning("%s:%d Can't handle %s",
+ fname, ln, dtype)
+ else:
+ msg += m
+
+ if msg:
+ yield fname, msg
diff --git a/tools/lib/python/kdoc/kdoc_item.py b/tools/lib/python/kdoc/kdoc_item.py
new file mode 100644
index 000000000000..19805301cb2c
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_item.py
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# A class that will, eventually, encapsulate all of the parsed data that we
+# then pass into the output modules.
+#
+
+class KdocItem:
+ def __init__(self, name, fname, type, start_line, **other_stuff):
+ self.name = name
+ self.fname = fname
+ self.type = type
+ self.declaration_start_line = start_line
+ self.sections = {}
+ self.sections_start_lines = {}
+ self.parameterlist = []
+ self.parameterdesc_start_lines = []
+ self.parameterdescs = {}
+ self.parametertypes = {}
+ #
+ # Just save everything else into our own dict so that the output
+ # side can grab it directly as before. As we move things into more
+ # structured data, this will, hopefully, fade away.
+ #
+ self.other_stuff = other_stuff
+
+ def get(self, key, default = None):
+ return self.other_stuff.get(key, default)
+
+ def __getitem__(self, key):
+ return self.get(key)
+
+ #
+ # Tracking of section and parameter information.
+ #
+ def set_sections(self, sections, start_lines):
+ self.sections = sections
+ self.section_start_lines = start_lines
+
+ def set_params(self, names, descs, types, starts):
+ self.parameterlist = names
+ self.parameterdescs = descs
+ self.parametertypes = types
+ self.parameterdesc_start_lines = starts
diff --git a/tools/lib/python/kdoc/kdoc_output.py b/tools/lib/python/kdoc/kdoc_output.py
new file mode 100644
index 000000000000..b1aaa7fc3604
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_output.py
@@ -0,0 +1,824 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0301,R0902,R0911,R0912,R0913,R0914,R0915,R0917
+
+"""
+Implement output filters to print kernel-doc documentation.
+
+The implementation uses a virtual base class (OutputFormat) which
+contains dispatches to virtual methods, and some code to filter
+out output messages.
+
+The actual implementation is done on one separate class per each type
+of output. Currently, there are output classes for ReST and man/troff.
+"""
+
+import os
+import re
+from datetime import datetime
+
+from kdoc.kdoc_parser import KernelDoc, type_param
+from kdoc.kdoc_re import KernRe
+
+
+function_pointer = KernRe(r"([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)", cache=False)
+
+# match expressions used to find embedded type information
+type_constant = KernRe(r"\b``([^\`]+)``\b", cache=False)
+type_constant2 = KernRe(r"\%([-_*\w]+)", cache=False)
+type_func = KernRe(r"(\w+)\(\)", cache=False)
+type_param_ref = KernRe(r"([\!~\*]?)\@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
+
+# Special RST handling for func ptr params
+type_fp_param = KernRe(r"\@(\w+)\(\)", cache=False)
+
+# Special RST handling for structs with func ptr params
+type_fp_param2 = KernRe(r"\@(\w+->\S+)\(\)", cache=False)
+
+type_env = KernRe(r"(\$\w+)", cache=False)
+type_enum = KernRe(r"\&(enum\s*([_\w]+))", cache=False)
+type_struct = KernRe(r"\&(struct\s*([_\w]+))", cache=False)
+type_typedef = KernRe(r"\&(typedef\s*([_\w]+))", cache=False)
+type_union = KernRe(r"\&(union\s*([_\w]+))", cache=False)
+type_member = KernRe(r"\&([_\w]+)(\.|->)([_\w]+)", cache=False)
+type_fallback = KernRe(r"\&([_\w]+)", cache=False)
+type_member_func = type_member + KernRe(r"\(\)", cache=False)
+
+
+class OutputFormat:
+ """
+ Base class for OutputFormat. If used as-is, it means that only
+ warnings will be displayed.
+ """
+
+ # output mode.
+ OUTPUT_ALL = 0 # output all symbols and doc sections
+ OUTPUT_INCLUDE = 1 # output only specified symbols
+ OUTPUT_EXPORTED = 2 # output exported symbols
+ OUTPUT_INTERNAL = 3 # output non-exported symbols
+
+ # Virtual member to be overridden at the inherited classes
+ highlights = []
+
+ def __init__(self):
+ """Declare internal vars and set mode to OUTPUT_ALL"""
+
+ self.out_mode = self.OUTPUT_ALL
+ self.enable_lineno = None
+ self.nosymbol = {}
+ self.symbol = None
+ self.function_table = None
+ self.config = None
+ self.no_doc_sections = False
+
+ self.data = ""
+
+ def set_config(self, config):
+ """
+ Setup global config variables used by both parser and output.
+ """
+
+ self.config = config
+
+ def set_filter(self, export, internal, symbol, nosymbol, function_table,
+ enable_lineno, no_doc_sections):
+ """
+ Initialize filter variables according to the requested mode.
+
+ Only one choice is valid between export, internal and symbol.
+
+ The nosymbol filter can be used on all modes.
+ """
+
+ self.enable_lineno = enable_lineno
+ self.no_doc_sections = no_doc_sections
+ self.function_table = function_table
+
+ if symbol:
+ self.out_mode = self.OUTPUT_INCLUDE
+ elif export:
+ self.out_mode = self.OUTPUT_EXPORTED
+ elif internal:
+ self.out_mode = self.OUTPUT_INTERNAL
+ else:
+ self.out_mode = self.OUTPUT_ALL
+
+ if nosymbol:
+ self.nosymbol = set(nosymbol)
+
+
+ def highlight_block(self, block):
+ """
+ Apply the RST highlights to a sub-block of text.
+ """
+
+ for r, sub in self.highlights:
+ block = r.sub(sub, block)
+
+ return block
+
+ def out_warnings(self, args):
+ """
+ Output warnings for identifiers that will be displayed.
+ """
+
+ for log_msg in args.warnings:
+ self.config.warning(log_msg)
+
+ def check_doc(self, name, args):
+ """Check if DOC should be output"""
+
+ if self.no_doc_sections:
+ return False
+
+ if name in self.nosymbol:
+ return False
+
+ if self.out_mode == self.OUTPUT_ALL:
+ self.out_warnings(args)
+ return True
+
+ if self.out_mode == self.OUTPUT_INCLUDE:
+ if name in self.function_table:
+ self.out_warnings(args)
+ return True
+
+ return False
+
+ def check_declaration(self, dtype, name, args):
+ """
+ Checks if a declaration should be output or not based on the
+ filtering criteria.
+ """
+
+ if name in self.nosymbol:
+ return False
+
+ if self.out_mode == self.OUTPUT_ALL:
+ self.out_warnings(args)
+ return True
+
+ if self.out_mode in [self.OUTPUT_INCLUDE, self.OUTPUT_EXPORTED]:
+ if name in self.function_table:
+ return True
+
+ if self.out_mode == self.OUTPUT_INTERNAL:
+ if dtype != "function":
+ self.out_warnings(args)
+ return True
+
+ if name not in self.function_table:
+ self.out_warnings(args)
+ return True
+
+ return False
+
+ def msg(self, fname, name, args):
+ """
+ Handles a single entry from kernel-doc parser
+ """
+
+ self.data = ""
+
+ dtype = args.type
+
+ if dtype == "doc":
+ self.out_doc(fname, name, args)
+ return self.data
+
+ if not self.check_declaration(dtype, name, args):
+ return self.data
+
+ if dtype == "function":
+ self.out_function(fname, name, args)
+ return self.data
+
+ if dtype == "enum":
+ self.out_enum(fname, name, args)
+ return self.data
+
+ if dtype == "typedef":
+ self.out_typedef(fname, name, args)
+ return self.data
+
+ if dtype in ["struct", "union"]:
+ self.out_struct(fname, name, args)
+ return self.data
+
+ # Warn if some type requires an output logic
+ self.config.log.warning("doesn't know how to output '%s' block",
+ dtype)
+
+ return None
+
+ # Virtual methods to be overridden by inherited classes
+ # At the base class, those do nothing.
+ def set_symbols(self, symbols):
+ """Get a list of all symbols from kernel_doc"""
+
+ def out_doc(self, fname, name, args):
+ """Outputs a DOC block"""
+
+ def out_function(self, fname, name, args):
+ """Outputs a function"""
+
+ def out_enum(self, fname, name, args):
+ """Outputs an enum"""
+
+ def out_typedef(self, fname, name, args):
+ """Outputs a typedef"""
+
+ def out_struct(self, fname, name, args):
+ """Outputs a struct"""
+
+
+class RestFormat(OutputFormat):
+ """Consts and functions used by ReST output"""
+
+ highlights = [
+ (type_constant, r"``\1``"),
+ (type_constant2, r"``\1``"),
+
+ # Note: need to escape () to avoid func matching later
+ (type_member_func, r":c:type:`\1\2\3\\(\\) <\1>`"),
+ (type_member, r":c:type:`\1\2\3 <\1>`"),
+ (type_fp_param, r"**\1\\(\\)**"),
+ (type_fp_param2, r"**\1\\(\\)**"),
+ (type_func, r"\1()"),
+ (type_enum, r":c:type:`\1 <\2>`"),
+ (type_struct, r":c:type:`\1 <\2>`"),
+ (type_typedef, r":c:type:`\1 <\2>`"),
+ (type_union, r":c:type:`\1 <\2>`"),
+
+ # in rst this can refer to any type
+ (type_fallback, r":c:type:`\1`"),
+ (type_param_ref, r"**\1\2**")
+ ]
+ blankline = "\n"
+
+ sphinx_literal = KernRe(r'^[^.].*::$', cache=False)
+ sphinx_cblock = KernRe(r'^\.\.\ +code-block::', cache=False)
+
+ def __init__(self):
+ """
+ Creates class variables.
+
+ Not really mandatory, but it is a good coding style and makes
+ pylint happy.
+ """
+
+ super().__init__()
+ self.lineprefix = ""
+
+ def print_lineno(self, ln):
+ """Outputs a line number"""
+
+ if self.enable_lineno and ln is not None:
+ ln += 1
+ self.data += f".. LINENO {ln}\n"
+
+ def output_highlight(self, args):
+ """
+ Outputs a C symbol that may require being converted to ReST using
+ the self.highlights variable
+ """
+
+ input_text = args
+ output = ""
+ in_literal = False
+ litprefix = ""
+ block = ""
+
+ for line in input_text.strip("\n").split("\n"):
+
+ # If we're in a literal block, see if we should drop out of it.
+ # Otherwise, pass the line straight through unmunged.
+ if in_literal:
+ if line.strip(): # If the line is not blank
+ # If this is the first non-blank line in a literal block,
+ # figure out the proper indent.
+ if not litprefix:
+ r = KernRe(r'^(\s*)')
+ if r.match(line):
+ litprefix = '^' + r.group(1)
+ else:
+ litprefix = ""
+
+ output += line + "\n"
+ elif not KernRe(litprefix).match(line):
+ in_literal = False
+ else:
+ output += line + "\n"
+ else:
+ output += line + "\n"
+
+ # Not in a literal block (or just dropped out)
+ if not in_literal:
+ block += line + "\n"
+ if self.sphinx_literal.match(line) or self.sphinx_cblock.match(line):
+ in_literal = True
+ litprefix = ""
+ output += self.highlight_block(block)
+ block = ""
+
+ # Handle any remaining block
+ if block:
+ output += self.highlight_block(block)
+
+ # Print the output with the line prefix
+ for line in output.strip("\n").split("\n"):
+ self.data += self.lineprefix + line + "\n"
+
+ def out_section(self, args, out_docblock=False):
+ """
+ Outputs a block section.
+
+ This could use some work; it's used to output the DOC: sections, and
+ starts by putting out the name of the doc section itself, but that
+ tends to duplicate a header already in the template file.
+ """
+ for section, text in args.sections.items():
+ # Skip sections that are in the nosymbol_table
+ if section in self.nosymbol:
+ continue
+
+ if out_docblock:
+ if not self.out_mode == self.OUTPUT_INCLUDE:
+ self.data += f".. _{section}:\n\n"
+ self.data += f'{self.lineprefix}**{section}**\n\n'
+ else:
+ self.data += f'{self.lineprefix}**{section}**\n\n'
+
+ self.print_lineno(args.section_start_lines.get(section, 0))
+ self.output_highlight(text)
+ self.data += "\n"
+ self.data += "\n"
+
+ def out_doc(self, fname, name, args):
+ if not self.check_doc(name, args):
+ return
+ self.out_section(args, out_docblock=True)
+
+ def out_function(self, fname, name, args):
+
+ oldprefix = self.lineprefix
+ signature = ""
+
+ func_macro = args.get('func_macro', False)
+ if func_macro:
+ signature = name
+ else:
+ if args.get('functiontype'):
+ signature = args['functiontype'] + " "
+ signature += name + " ("
+
+ ln = args.declaration_start_line
+ count = 0
+ for parameter in args.parameterlist:
+ if count != 0:
+ signature += ", "
+ count += 1
+ dtype = args.parametertypes.get(parameter, "")
+
+ if function_pointer.search(dtype):
+ signature += function_pointer.group(1) + parameter + function_pointer.group(3)
+ else:
+ signature += dtype
+
+ if not func_macro:
+ signature += ")"
+
+ self.print_lineno(ln)
+ if args.get('typedef') or not args.get('functiontype'):
+ self.data += f".. c:macro:: {name}\n\n"
+
+ if args.get('typedef'):
+ self.data += " **Typedef**: "
+ self.lineprefix = ""
+ self.output_highlight(args.get('purpose', ""))
+ self.data += "\n\n**Syntax**\n\n"
+ self.data += f" ``{signature}``\n\n"
+ else:
+ self.data += f"``{signature}``\n\n"
+ else:
+ self.data += f".. c:function:: {signature}\n\n"
+
+ if not args.get('typedef'):
+ self.print_lineno(ln)
+ self.lineprefix = " "
+ self.output_highlight(args.get('purpose', ""))
+ self.data += "\n"
+
+ # Put descriptive text into a container (HTML <div>) to help set
+ # function prototypes apart
+ self.lineprefix = " "
+
+ if args.parameterlist:
+ self.data += ".. container:: kernelindent\n\n"
+ self.data += f"{self.lineprefix}**Parameters**\n\n"
+
+ for parameter in args.parameterlist:
+ parameter_name = KernRe(r'\[.*').sub('', parameter)
+ dtype = args.parametertypes.get(parameter, "")
+
+ if dtype:
+ self.data += f"{self.lineprefix}``{dtype}``\n"
+ else:
+ self.data += f"{self.lineprefix}``{parameter}``\n"
+
+ self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0))
+
+ self.lineprefix = " "
+ if parameter_name in args.parameterdescs and \
+ args.parameterdescs[parameter_name] != KernelDoc.undescribed:
+
+ self.output_highlight(args.parameterdescs[parameter_name])
+ self.data += "\n"
+ else:
+ self.data += f"{self.lineprefix}*undescribed*\n\n"
+ self.lineprefix = " "
+
+ self.out_section(args)
+ self.lineprefix = oldprefix
+
+ def out_enum(self, fname, name, args):
+
+ oldprefix = self.lineprefix
+ ln = args.declaration_start_line
+
+ self.data += f"\n\n.. c:enum:: {name}\n\n"
+
+ self.print_lineno(ln)
+ self.lineprefix = " "
+ self.output_highlight(args.get('purpose', ''))
+ self.data += "\n"
+
+ self.data += ".. container:: kernelindent\n\n"
+ outer = self.lineprefix + " "
+ self.lineprefix = outer + " "
+ self.data += f"{outer}**Constants**\n\n"
+
+ for parameter in args.parameterlist:
+ self.data += f"{outer}``{parameter}``\n"
+
+ if args.parameterdescs.get(parameter, '') != KernelDoc.undescribed:
+ self.output_highlight(args.parameterdescs[parameter])
+ else:
+ self.data += f"{self.lineprefix}*undescribed*\n\n"
+ self.data += "\n"
+
+ self.lineprefix = oldprefix
+ self.out_section(args)
+
+ def out_typedef(self, fname, name, args):
+
+ oldprefix = self.lineprefix
+ ln = args.declaration_start_line
+
+ self.data += f"\n\n.. c:type:: {name}\n\n"
+
+ self.print_lineno(ln)
+ self.lineprefix = " "
+
+ self.output_highlight(args.get('purpose', ''))
+
+ self.data += "\n"
+
+ self.lineprefix = oldprefix
+ self.out_section(args)
+
+ def out_struct(self, fname, name, args):
+
+ purpose = args.get('purpose', "")
+ declaration = args.get('definition', "")
+ dtype = args.type
+ ln = args.declaration_start_line
+
+ self.data += f"\n\n.. c:{dtype}:: {name}\n\n"
+
+ self.print_lineno(ln)
+
+ oldprefix = self.lineprefix
+ self.lineprefix += " "
+
+ self.output_highlight(purpose)
+ self.data += "\n"
+
+ self.data += ".. container:: kernelindent\n\n"
+ self.data += f"{self.lineprefix}**Definition**::\n\n"
+
+ self.lineprefix = self.lineprefix + " "
+
+ declaration = declaration.replace("\t", self.lineprefix)
+
+ self.data += f"{self.lineprefix}{dtype} {name}" + ' {' + "\n"
+ self.data += f"{declaration}{self.lineprefix}" + "};\n\n"
+
+ self.lineprefix = " "
+ self.data += f"{self.lineprefix}**Members**\n\n"
+ for parameter in args.parameterlist:
+ if not parameter or parameter.startswith("#"):
+ continue
+
+ parameter_name = parameter.split("[", maxsplit=1)[0]
+
+ if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed:
+ continue
+
+ self.print_lineno(args.parameterdesc_start_lines.get(parameter_name, 0))
+
+ self.data += f"{self.lineprefix}``{parameter}``\n"
+
+ self.lineprefix = " "
+ self.output_highlight(args.parameterdescs[parameter_name])
+ self.lineprefix = " "
+
+ self.data += "\n"
+
+ self.data += "\n"
+
+ self.lineprefix = oldprefix
+ self.out_section(args)
+
+
+class ManFormat(OutputFormat):
+ """Consts and functions used by man pages output"""
+
+ highlights = (
+ (type_constant, r"\1"),
+ (type_constant2, r"\1"),
+ (type_func, r"\\fB\1\\fP"),
+ (type_enum, r"\\fI\1\\fP"),
+ (type_struct, r"\\fI\1\\fP"),
+ (type_typedef, r"\\fI\1\\fP"),
+ (type_union, r"\\fI\1\\fP"),
+ (type_param, r"\\fI\1\\fP"),
+ (type_param_ref, r"\\fI\1\2\\fP"),
+ (type_member, r"\\fI\1\2\3\\fP"),
+ (type_fallback, r"\\fI\1\\fP")
+ )
+ blankline = ""
+
+ date_formats = [
+ "%a %b %d %H:%M:%S %Z %Y",
+ "%a %b %d %H:%M:%S %Y",
+ "%Y-%m-%d",
+ "%b %d %Y",
+ "%B %d %Y",
+ "%m %d %Y",
+ ]
+
+ def __init__(self, modulename):
+ """
+ Creates class variables.
+
+ Not really mandatory, but it is a good coding style and makes
+ pylint happy.
+ """
+
+ super().__init__()
+ self.modulename = modulename
+ self.symbols = []
+
+ dt = None
+ tstamp = os.environ.get("KBUILD_BUILD_TIMESTAMP")
+ if tstamp:
+ for fmt in self.date_formats:
+ try:
+ dt = datetime.strptime(tstamp, fmt)
+ break
+ except ValueError:
+ pass
+
+ if not dt:
+ dt = datetime.now()
+
+ self.man_date = dt.strftime("%B %Y")
+
+ def arg_name(self, args, name):
+ """
+ Return the name that will be used for the man page.
+
+ As we may have the same name on different namespaces,
+ prepend the data type for all types except functions and typedefs.
+
+ The doc section is special: it uses the modulename.
+ """
+
+ dtype = args.type
+
+ if dtype == "doc":
+ return self.modulename
+
+ if dtype in ["function", "typedef"]:
+ return name
+
+ return f"{dtype} {name}"
+
+ def set_symbols(self, symbols):
+ """
+ Get a list of all symbols from kernel_doc.
+
+ Man pages will uses it to add a SEE ALSO section with other
+ symbols at the same file.
+ """
+ self.symbols = symbols
+
+ def out_tail(self, fname, name, args):
+ """Adds a tail for all man pages"""
+
+ # SEE ALSO section
+ self.data += f'.SH "SEE ALSO"' + "\n.PP\n"
+ self.data += (f"Kernel file \\fB{args.fname}\\fR\n")
+ if len(self.symbols) >= 2:
+ cur_name = self.arg_name(args, name)
+
+ related = []
+ for arg in self.symbols:
+ out_name = self.arg_name(arg, arg.name)
+
+ if cur_name == out_name:
+ continue
+
+ related.append(f"\\fB{out_name}\\fR(9)")
+
+ self.data += ",\n".join(related) + "\n"
+
+ # TODO: does it make sense to add other sections? Maybe
+ # REPORTING ISSUES? LICENSE?
+
+ def msg(self, fname, name, args):
+ """
+ Handles a single entry from kernel-doc parser.
+
+ Add a tail at the end of man pages output.
+ """
+ super().msg(fname, name, args)
+ self.out_tail(fname, name, args)
+
+ return self.data
+
+ def output_highlight(self, block):
+ """
+ Outputs a C symbol that may require being highlighted with
+ self.highlights variable using troff syntax
+ """
+
+ contents = self.highlight_block(block)
+
+ if isinstance(contents, list):
+ contents = "\n".join(contents)
+
+ for line in contents.strip("\n").split("\n"):
+ line = KernRe(r"^\s*").sub("", line)
+ if not line:
+ continue
+
+ if line[0] == ".":
+ self.data += "\\&" + line + "\n"
+ else:
+ self.data += line + "\n"
+
+ def out_doc(self, fname, name, args):
+ if not self.check_doc(name, args):
+ return
+
+ out_name = self.arg_name(args, name)
+
+ self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+ for section, text in args.sections.items():
+ self.data += f'.SH "{section}"' + "\n"
+ self.output_highlight(text)
+
+ def out_function(self, fname, name, args):
+ """output function in man"""
+
+ out_name = self.arg_name(args, name)
+
+ self.data += f'.TH "{name}" 9 "{out_name}" "{self.man_date}" "Kernel Hacker\'s Manual" LINUX' + "\n"
+
+ self.data += ".SH NAME\n"
+ self.data += f"{name} \\- {args['purpose']}\n"
+
+ self.data += ".SH SYNOPSIS\n"
+ if args.get('functiontype', ''):
+ self.data += f'.B "{args["functiontype"]}" {name}' + "\n"
+ else:
+ self.data += f'.B "{name}' + "\n"
+
+ count = 0
+ parenth = "("
+ post = ","
+
+ for parameter in args.parameterlist:
+ if count == len(args.parameterlist) - 1:
+ post = ");"
+
+ dtype = args.parametertypes.get(parameter, "")
+ if function_pointer.match(dtype):
+ # Pointer-to-function
+ self.data += f'".BI "{parenth}{function_pointer.group(1)}" " ") ({function_pointer.group(2)}){post}"' + "\n"
+ else:
+ dtype = KernRe(r'([^\*])$').sub(r'\1 ', dtype)
+
+ self.data += f'.BI "{parenth}{dtype}" "{post}"' + "\n"
+ count += 1
+ parenth = ""
+
+ if args.parameterlist:
+ self.data += ".SH ARGUMENTS\n"
+
+ for parameter in args.parameterlist:
+ parameter_name = re.sub(r'\[.*', '', parameter)
+
+ self.data += f'.IP "{parameter}" 12' + "\n"
+ self.output_highlight(args.parameterdescs.get(parameter_name, ""))
+
+ for section, text in args.sections.items():
+ self.data += f'.SH "{section.upper()}"' + "\n"
+ self.output_highlight(text)
+
+ def out_enum(self, fname, name, args):
+ out_name = self.arg_name(args, name)
+
+ self.data += f'.TH "{self.modulename}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+ self.data += ".SH NAME\n"
+ self.data += f"enum {name} \\- {args['purpose']}\n"
+
+ self.data += ".SH SYNOPSIS\n"
+ self.data += f"enum {name}" + " {\n"
+
+ count = 0
+ for parameter in args.parameterlist:
+ self.data += f'.br\n.BI " {parameter}"' + "\n"
+ if count == len(args.parameterlist) - 1:
+ self.data += "\n};\n"
+ else:
+ self.data += ", \n.br\n"
+
+ count += 1
+
+ self.data += ".SH Constants\n"
+
+ for parameter in args.parameterlist:
+ parameter_name = KernRe(r'\[.*').sub('', parameter)
+ self.data += f'.IP "{parameter}" 12' + "\n"
+ self.output_highlight(args.parameterdescs.get(parameter_name, ""))
+
+ for section, text in args.sections.items():
+ self.data += f'.SH "{section}"' + "\n"
+ self.output_highlight(text)
+
+ def out_typedef(self, fname, name, args):
+ module = self.modulename
+ purpose = args.get('purpose')
+ out_name = self.arg_name(args, name)
+
+ self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+ self.data += ".SH NAME\n"
+ self.data += f"typedef {name} \\- {purpose}\n"
+
+ for section, text in args.sections.items():
+ self.data += f'.SH "{section}"' + "\n"
+ self.output_highlight(text)
+
+ def out_struct(self, fname, name, args):
+ module = self.modulename
+ purpose = args.get('purpose')
+ definition = args.get('definition')
+ out_name = self.arg_name(args, name)
+
+ self.data += f'.TH "{module}" 9 "{out_name}" "{self.man_date}" "API Manual" LINUX' + "\n"
+
+ self.data += ".SH NAME\n"
+ self.data += f"{args.type} {name} \\- {purpose}\n"
+
+ # Replace tabs with two spaces and handle newlines
+ declaration = definition.replace("\t", " ")
+ declaration = KernRe(r"\n").sub('"\n.br\n.BI "', declaration)
+
+ self.data += ".SH SYNOPSIS\n"
+ self.data += f"{args.type} {name} " + "{" + "\n.br\n"
+ self.data += f'.BI "{declaration}\n' + "};\n.br\n\n"
+
+ self.data += ".SH Members\n"
+ for parameter in args.parameterlist:
+ if parameter.startswith("#"):
+ continue
+
+ parameter_name = re.sub(r"\[.*", "", parameter)
+
+ if args.parameterdescs.get(parameter_name) == KernelDoc.undescribed:
+ continue
+
+ self.data += f'.IP "{parameter}" 12' + "\n"
+ self.output_highlight(args.parameterdescs.get(parameter_name))
+
+ for section, text in args.sections.items():
+ self.data += f'.SH "{section}"' + "\n"
+ self.output_highlight(text)
diff --git a/tools/lib/python/kdoc/kdoc_parser.py b/tools/lib/python/kdoc/kdoc_parser.py
new file mode 100644
index 000000000000..500aafc50032
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_parser.py
@@ -0,0 +1,1670 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+#
+# pylint: disable=C0301,C0302,R0904,R0912,R0913,R0914,R0915,R0917,R1702
+
+"""
+kdoc_parser
+===========
+
+Read a C language source or header FILE and extract embedded
+documentation comments
+"""
+
+import sys
+import re
+from pprint import pformat
+
+from kdoc.kdoc_re import NestedMatch, KernRe
+from kdoc.kdoc_item import KdocItem
+
+#
+# Regular expressions used to parse kernel-doc markups at KernelDoc class.
+#
+# Let's declare them in lowercase outside any class to make it easier to
+# convert from the Perl script.
+#
+# As those are evaluated at the beginning, no need to cache them
+#
+
+# Allow whitespace at end of comment start.
+doc_start = KernRe(r'^/\*\*\s*$', cache=False)
+
+doc_end = KernRe(r'\*/', cache=False)
+doc_com = KernRe(r'\s*\*\s*', cache=False)
+doc_com_body = KernRe(r'\s*\* ?', cache=False)
+doc_decl = doc_com + KernRe(r'(\w+)', cache=False)
+
+# @params and a strictly limited set of supported section names
+# Specifically:
+# Match @word:
+# @...:
+# @{section-name}:
+# while trying to not match literal block starts like "example::"
+#
+known_section_names = 'description|context|returns?|notes?|examples?'
+known_sections = KernRe(known_section_names, flags = re.I)
+doc_sect = doc_com + \
+ KernRe(r'\s*(@[.\w]+|@\.\.\.|' + known_section_names + r')\s*:([^:].*)?$',
+ flags=re.I, cache=False)
+
+doc_content = doc_com_body + KernRe(r'(.*)', cache=False)
+doc_inline_start = KernRe(r'^\s*/\*\*\s*$', cache=False)
+doc_inline_sect = KernRe(r'\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)', cache=False)
+doc_inline_end = KernRe(r'^\s*\*/\s*$', cache=False)
+doc_inline_oneline = KernRe(r'^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$', cache=False)
+
+export_symbol = KernRe(r'^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*', cache=False)
+export_symbol_ns = KernRe(r'^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*"\S+"\)\s*', cache=False)
+
+type_param = KernRe(r"@(\w*((\.\w+)|(->\w+))*(\.\.\.)?)", cache=False)
+
+#
+# Tests for the beginning of a kerneldoc block in its various forms.
+#
+doc_block = doc_com + KernRe(r'DOC:\s*(.*)?', cache=False)
+doc_begin_data = KernRe(r"^\s*\*?\s*(struct|union|enum|typedef)\b\s*(\w*)", cache = False)
+doc_begin_func = KernRe(str(doc_com) + # initial " * '
+ r"(?:\w+\s*\*\s*)?" + # type (not captured)
+ r'(?:define\s+)?' + # possible "define" (not captured)
+ r'(\w+)\s*(?:\(\w*\))?\s*' + # name and optional "(...)"
+ r'(?:[-:].*)?$', # description (not captured)
+ cache = False)
+
+#
+# Here begins a long set of transformations to turn structure member prefixes
+# and macro invocations into something we can parse and generate kdoc for.
+#
+struct_args_pattern = r'([^,)]+)'
+
+struct_xforms = [
+ # Strip attributes
+ (KernRe(r"__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)", flags=re.I | re.S, cache=False), ' '),
+ (KernRe(r'\s*__aligned\s*\([^;]*\)', re.S), ' '),
+ (KernRe(r'\s*__counted_by\s*\([^;]*\)', re.S), ' '),
+ (KernRe(r'\s*__counted_by_(le|be)\s*\([^;]*\)', re.S), ' '),
+ (KernRe(r'\s*__packed\s*', re.S), ' '),
+ (KernRe(r'\s*CRYPTO_MINALIGN_ATTR', re.S), ' '),
+ (KernRe(r'\s*__private', re.S), ' '),
+ (KernRe(r'\s*__rcu', re.S), ' '),
+ (KernRe(r'\s*____cacheline_aligned_in_smp', re.S), ' '),
+ (KernRe(r'\s*____cacheline_aligned', re.S), ' '),
+ (KernRe(r'\s*__cacheline_group_(begin|end)\([^\)]+\);'), ''),
+ #
+ # Unwrap struct_group macros based on this definition:
+ # __struct_group(TAG, NAME, ATTRS, MEMBERS...)
+ # which has variants like: struct_group(NAME, MEMBERS...)
+ # Only MEMBERS arguments require documentation.
+ #
+ # Parsing them happens on two steps:
+ #
+ # 1. drop struct group arguments that aren't at MEMBERS,
+ # storing them as STRUCT_GROUP(MEMBERS)
+ #
+ # 2. remove STRUCT_GROUP() ancillary macro.
+ #
+ # The original logic used to remove STRUCT_GROUP() using an
+ # advanced regex:
+ #
+ # \bSTRUCT_GROUP(\(((?:(?>[^)(]+)|(?1))*)\))[^;]*;
+ #
+ # with two patterns that are incompatible with
+ # Python re module, as it has:
+ #
+ # - a recursive pattern: (?1)
+ # - an atomic grouping: (?>...)
+ #
+ # I tried a simpler version: but it didn't work either:
+ # \bSTRUCT_GROUP\(([^\)]+)\)[^;]*;
+ #
+ # As it doesn't properly match the end parenthesis on some cases.
+ #
+ # So, a better solution was crafted: there's now a NestedMatch
+ # class that ensures that delimiters after a search are properly
+ # matched. So, the implementation to drop STRUCT_GROUP() will be
+ # handled in separate.
+ #
+ (KernRe(r'\bstruct_group\s*\(([^,]*,)', re.S), r'STRUCT_GROUP('),
+ (KernRe(r'\bstruct_group_attr\s*\(([^,]*,){2}', re.S), r'STRUCT_GROUP('),
+ (KernRe(r'\bstruct_group_tagged\s*\(([^,]*),([^,]*),', re.S), r'struct \1 \2; STRUCT_GROUP('),
+ (KernRe(r'\b__struct_group\s*\(([^,]*,){3}', re.S), r'STRUCT_GROUP('),
+ #
+ # Replace macros
+ #
+ # TODO: use NestedMatch for FOO($1, $2, ...) matches
+ #
+ # it is better to also move those to the NestedMatch logic,
+ # to ensure that parentheses will be properly matched.
+ #
+ (KernRe(r'__ETHTOOL_DECLARE_LINK_MODE_MASK\s*\(([^\)]+)\)', re.S),
+ r'DECLARE_BITMAP(\1, __ETHTOOL_LINK_MODE_MASK_NBITS)'),
+ (KernRe(r'DECLARE_PHY_INTERFACE_MASK\s*\(([^\)]+)\)', re.S),
+ r'DECLARE_BITMAP(\1, PHY_INTERFACE_MODE_MAX)'),
+ (KernRe(r'DECLARE_BITMAP\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
+ re.S), r'unsigned long \1[BITS_TO_LONGS(\2)]'),
+ (KernRe(r'DECLARE_HASHTABLE\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern + r'\)',
+ re.S), r'unsigned long \1[1 << ((\2) - 1)]'),
+ (KernRe(r'DECLARE_KFIFO\s*\(' + struct_args_pattern + r',\s*' + struct_args_pattern +
+ r',\s*' + struct_args_pattern + r'\)', re.S), r'\2 *\1'),
+ (KernRe(r'DECLARE_KFIFO_PTR\s*\(' + struct_args_pattern + r',\s*' +
+ struct_args_pattern + r'\)', re.S), r'\2 *\1'),
+ (KernRe(r'(?:__)?DECLARE_FLEX_ARRAY\s*\(' + struct_args_pattern + r',\s*' +
+ struct_args_pattern + r'\)', re.S), r'\1 \2[]'),
+ (KernRe(r'DEFINE_DMA_UNMAP_ADDR\s*\(' + struct_args_pattern + r'\)', re.S), r'dma_addr_t \1'),
+ (KernRe(r'DEFINE_DMA_UNMAP_LEN\s*\(' + struct_args_pattern + r'\)', re.S), r'__u32 \1'),
+]
+#
+# Regexes here are guaranteed to have the end delimiter matching
+# the start delimiter. Yet, right now, only one replace group
+# is allowed.
+#
+struct_nested_prefixes = [
+ (re.compile(r'\bSTRUCT_GROUP\('), r'\1'),
+]
+
+#
+# Transforms for function prototypes
+#
+function_xforms = [
+ (KernRe(r"^static +"), ""),
+ (KernRe(r"^extern +"), ""),
+ (KernRe(r"^asmlinkage +"), ""),
+ (KernRe(r"^inline +"), ""),
+ (KernRe(r"^__inline__ +"), ""),
+ (KernRe(r"^__inline +"), ""),
+ (KernRe(r"^__always_inline +"), ""),
+ (KernRe(r"^noinline +"), ""),
+ (KernRe(r"^__FORTIFY_INLINE +"), ""),
+ (KernRe(r"__init +"), ""),
+ (KernRe(r"__init_or_module +"), ""),
+ (KernRe(r"__deprecated +"), ""),
+ (KernRe(r"__flatten +"), ""),
+ (KernRe(r"__meminit +"), ""),
+ (KernRe(r"__must_check +"), ""),
+ (KernRe(r"__weak +"), ""),
+ (KernRe(r"__sched +"), ""),
+ (KernRe(r"_noprof"), ""),
+ (KernRe(r"__always_unused *"), ""),
+ (KernRe(r"__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +"), ""),
+ (KernRe(r"__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +"), ""),
+ (KernRe(r"__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +"), ""),
+ (KernRe(r"DECL_BUCKET_PARAMS\s*\(\s*(\S+)\s*,\s*(\S+)\s*\)"), r"\1, \2"),
+ (KernRe(r"__attribute_const__ +"), ""),
+ (KernRe(r"__attribute__\s*\(\((?:[\w\s]+(?:\([^)]*\))?\s*,?)+\)\)\s+"), ""),
+]
+
+#
+# Apply a set of transforms to a block of text.
+#
+def apply_transforms(xforms, text):
+ for search, subst in xforms:
+ text = search.sub(subst, text)
+ return text
+
+#
+# A little helper to get rid of excess white space
+#
+multi_space = KernRe(r'\s\s+')
+def trim_whitespace(s):
+ return multi_space.sub(' ', s.strip())
+
+#
+# Remove struct/enum members that have been marked "private".
+#
+def trim_private_members(text):
+ #
+ # First look for a "public:" block that ends a private region, then
+ # handle the "private until the end" case.
+ #
+ text = KernRe(r'/\*\s*private:.*?/\*\s*public:.*?\*/', flags=re.S).sub('', text)
+ text = KernRe(r'/\*\s*private:.*', flags=re.S).sub('', text)
+ #
+ # We needed the comments to do the above, but now we can take them out.
+ #
+ return KernRe(r'\s*/\*.*?\*/\s*', flags=re.S).sub('', text).strip()
+
+class state:
+ """
+ State machine enums
+ """
+
+ # Parser states
+ NORMAL = 0 # normal code
+ NAME = 1 # looking for function name
+ DECLARATION = 2 # We have seen a declaration which might not be done
+ BODY = 3 # the body of the comment
+ SPECIAL_SECTION = 4 # doc section ending with a blank line
+ PROTO = 5 # scanning prototype
+ DOCBLOCK = 6 # documentation block
+ INLINE_NAME = 7 # gathering doc outside main block
+ INLINE_TEXT = 8 # reading the body of inline docs
+
+ name = [
+ "NORMAL",
+ "NAME",
+ "DECLARATION",
+ "BODY",
+ "SPECIAL_SECTION",
+ "PROTO",
+ "DOCBLOCK",
+ "INLINE_NAME",
+ "INLINE_TEXT",
+ ]
+
+
+SECTION_DEFAULT = "Description" # default section
+
+class KernelEntry:
+
+ def __init__(self, config, fname, ln):
+ self.config = config
+ self.fname = fname
+
+ self._contents = []
+ self.prototype = ""
+
+ self.warnings = []
+
+ self.parameterlist = []
+ self.parameterdescs = {}
+ self.parametertypes = {}
+ self.parameterdesc_start_lines = {}
+
+ self.section_start_lines = {}
+ self.sections = {}
+
+ self.anon_struct_union = False
+
+ self.leading_space = None
+
+ self.fname = fname
+
+ # State flags
+ self.brcount = 0
+ self.declaration_start_line = ln + 1
+
+ #
+ # Management of section contents
+ #
+ def add_text(self, text):
+ self._contents.append(text)
+
+ def contents(self):
+ return '\n'.join(self._contents) + '\n'
+
+ # TODO: rename to emit_message after removal of kernel-doc.pl
+ def emit_msg(self, ln, msg, *, warning=True):
+ """Emit a message"""
+
+ log_msg = f"{self.fname}:{ln} {msg}"
+
+ if not warning:
+ self.config.log.info(log_msg)
+ return
+
+ # Delegate warning output to output logic, as this way it
+ # will report warnings/info only for symbols that are output
+
+ self.warnings.append(log_msg)
+ return
+
+ #
+ # Begin a new section.
+ #
+ def begin_section(self, line_no, title = SECTION_DEFAULT, dump = False):
+ if dump:
+ self.dump_section(start_new = True)
+ self.section = title
+ self.new_start_line = line_no
+
+ def dump_section(self, start_new=True):
+ """
+ Dumps section contents to arrays/hashes intended for that purpose.
+ """
+ #
+ # If we have accumulated no contents in the default ("description")
+ # section, don't bother.
+ #
+ if self.section == SECTION_DEFAULT and not self._contents:
+ return
+ name = self.section
+ contents = self.contents()
+
+ if type_param.match(name):
+ name = type_param.group(1)
+
+ self.parameterdescs[name] = contents
+ self.parameterdesc_start_lines[name] = self.new_start_line
+
+ self.new_start_line = 0
+
+ else:
+ if name in self.sections and self.sections[name] != "":
+ # Only warn on user-specified duplicate section names
+ if name != SECTION_DEFAULT:
+ self.emit_msg(self.new_start_line,
+ f"duplicate section name '{name}'")
+ # Treat as a new paragraph - add a blank line
+ self.sections[name] += '\n' + contents
+ else:
+ self.sections[name] = contents
+ self.section_start_lines[name] = self.new_start_line
+ self.new_start_line = 0
+
+# self.config.log.debug("Section: %s : %s", name, pformat(vars(self)))
+
+ if start_new:
+ self.section = SECTION_DEFAULT
+ self._contents = []
+
+python_warning = False
+
+class KernelDoc:
+ """
+ Read a C language source or header FILE and extract embedded
+ documentation comments.
+ """
+
+ # Section names
+
+ section_context = "Context"
+ section_return = "Return"
+
+ undescribed = "-- undescribed --"
+
+ def __init__(self, config, fname):
+ """Initialize internal variables"""
+
+ self.fname = fname
+ self.config = config
+
+ # Initial state for the state machines
+ self.state = state.NORMAL
+
+ # Store entry currently being processed
+ self.entry = None
+
+ # Place all potential outputs into an array
+ self.entries = []
+
+ #
+ # We need Python 3.7 for its "dicts remember the insertion
+ # order" guarantee
+ #
+ global python_warning
+ if (not python_warning and
+ sys.version_info.major == 3 and sys.version_info.minor < 7):
+
+ self.emit_msg(0,
+ 'Python 3.7 or later is required for correct results')
+ python_warning = True
+
+ def emit_msg(self, ln, msg, *, warning=True):
+ """Emit a message"""
+
+ if self.entry:
+ self.entry.emit_msg(ln, msg, warning=warning)
+ return
+
+ log_msg = f"{self.fname}:{ln} {msg}"
+
+ if warning:
+ self.config.log.warning(log_msg)
+ else:
+ self.config.log.info(log_msg)
+
+ def dump_section(self, start_new=True):
+ """
+ Dumps section contents to arrays/hashes intended for that purpose.
+ """
+
+ if self.entry:
+ self.entry.dump_section(start_new)
+
+ # TODO: rename it to store_declaration after removal of kernel-doc.pl
+ def output_declaration(self, dtype, name, **args):
+ """
+ Stores the entry into an entry array.
+
+ The actual output and output filters will be handled elsewhere
+ """
+
+ item = KdocItem(name, self.fname, dtype,
+ self.entry.declaration_start_line, **args)
+ item.warnings = self.entry.warnings
+
+ # Drop empty sections
+ # TODO: improve empty sections logic to emit warnings
+ sections = self.entry.sections
+ for section in ["Description", "Return"]:
+ if section in sections and not sections[section].rstrip():
+ del sections[section]
+ item.set_sections(sections, self.entry.section_start_lines)
+ item.set_params(self.entry.parameterlist, self.entry.parameterdescs,
+ self.entry.parametertypes,
+ self.entry.parameterdesc_start_lines)
+ self.entries.append(item)
+
+ self.config.log.debug("Output: %s:%s = %s", dtype, name, pformat(args))
+
+ def reset_state(self, ln):
+ """
+ Ancillary routine to create a new entry. It initializes all
+ variables used by the state machine.
+ """
+
+ #
+ # Flush the warnings out before we proceed further
+ #
+ if self.entry and self.entry not in self.entries:
+ for log_msg in self.entry.warnings:
+ self.config.log.warning(log_msg)
+
+ self.entry = KernelEntry(self.config, self.fname, ln)
+
+ # State flags
+ self.state = state.NORMAL
+
+ def push_parameter(self, ln, decl_type, param, dtype,
+ org_arg, declaration_name):
+ """
+ Store parameters and their descriptions at self.entry.
+ """
+
+ if self.entry.anon_struct_union and dtype == "" and param == "}":
+ return # Ignore the ending }; from anonymous struct/union
+
+ self.entry.anon_struct_union = False
+
+ param = KernRe(r'[\[\)].*').sub('', param, count=1)
+
+ #
+ # Look at various "anonymous type" cases.
+ #
+ if dtype == '':
+ if param.endswith("..."):
+ if len(param) > 3: # there is a name provided, use that
+ param = param[:-3]
+ if not self.entry.parameterdescs.get(param):
+ self.entry.parameterdescs[param] = "variable arguments"
+
+ elif (not param) or param == "void":
+ param = "void"
+ self.entry.parameterdescs[param] = "no arguments"
+
+ elif param in ["struct", "union"]:
+ # Handle unnamed (anonymous) union or struct
+ dtype = param
+ param = "{unnamed_" + param + "}"
+ self.entry.parameterdescs[param] = "anonymous\n"
+ self.entry.anon_struct_union = True
+
+ # Warn if parameter has no description
+ # (but ignore ones starting with # as these are not parameters
+ # but inline preprocessor statements)
+ if param not in self.entry.parameterdescs and not param.startswith("#"):
+ self.entry.parameterdescs[param] = self.undescribed
+
+ if "." not in param:
+ if decl_type == 'function':
+ dname = f"{decl_type} parameter"
+ else:
+ dname = f"{decl_type} member"
+
+ self.emit_msg(ln,
+ f"{dname} '{param}' not described in '{declaration_name}'")
+
+ # Strip spaces from param so that it is one continuous string on
+ # parameterlist. This fixes a problem where check_sections()
+ # cannot find a parameter like "addr[6 + 2]" because it actually
+ # appears as "addr[6", "+", "2]" on the parameter list.
+ # However, it's better to maintain the param string unchanged for
+ # output, so just weaken the string compare in check_sections()
+ # to ignore "[blah" in a parameter string.
+
+ self.entry.parameterlist.append(param)
+ org_arg = KernRe(r'\s\s+').sub(' ', org_arg)
+ self.entry.parametertypes[param] = org_arg
+
+
+ def create_parameter_list(self, ln, decl_type, args,
+ splitter, declaration_name):
+ """
+ Creates a list of parameters, storing them at self.entry.
+ """
+
+ # temporarily replace all commas inside function pointer definition
+ arg_expr = KernRe(r'(\([^\),]+),')
+ while arg_expr.search(args):
+ args = arg_expr.sub(r"\1#", args)
+
+ for arg in args.split(splitter):
+ # Ignore argument attributes
+ arg = KernRe(r'\sPOS0?\s').sub(' ', arg)
+
+ # Strip leading/trailing spaces
+ arg = arg.strip()
+ arg = KernRe(r'\s+').sub(' ', arg, count=1)
+
+ if arg.startswith('#'):
+ # Treat preprocessor directive as a typeless variable just to fill
+ # corresponding data structures "correctly". Catch it later in
+ # output_* subs.
+
+ # Treat preprocessor directive as a typeless variable
+ self.push_parameter(ln, decl_type, arg, "",
+ "", declaration_name)
+ #
+ # The pointer-to-function case.
+ #
+ elif KernRe(r'\(.+\)\s*\(').search(arg):
+ arg = arg.replace('#', ',')
+ r = KernRe(r'[^\(]+\(\*?\s*' # Everything up to "(*"
+ r'([\w\[\].]*)' # Capture the name and possible [array]
+ r'\s*\)') # Make sure the trailing ")" is there
+ if r.match(arg):
+ param = r.group(1)
+ else:
+ self.emit_msg(ln, f"Invalid param: {arg}")
+ param = arg
+ dtype = arg.replace(param, '')
+ self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
+ #
+ # The array-of-pointers case. Dig the parameter name out from the middle
+ # of the declaration.
+ #
+ elif KernRe(r'\(.+\)\s*\[').search(arg):
+ r = KernRe(r'[^\(]+\(\s*\*\s*' # Up to "(" and maybe "*"
+ r'([\w.]*?)' # The actual pointer name
+ r'\s*(\[\s*\w+\s*\]\s*)*\)') # The [array portion]
+ if r.match(arg):
+ param = r.group(1)
+ else:
+ self.emit_msg(ln, f"Invalid param: {arg}")
+ param = arg
+ dtype = arg.replace(param, '')
+ self.push_parameter(ln, decl_type, param, dtype, arg, declaration_name)
+ elif arg:
+ #
+ # Clean up extraneous spaces and split the string at commas; the first
+ # element of the resulting list will also include the type information.
+ #
+ arg = KernRe(r'\s*:\s*').sub(":", arg)
+ arg = KernRe(r'\s*\[').sub('[', arg)
+ args = KernRe(r'\s*,\s*').split(arg)
+ args[0] = re.sub(r'(\*+)\s*', r' \1', args[0])
+ #
+ # args[0] has a string of "type a". If "a" includes an [array]
+ # declaration, we want to not be fooled by any white space inside
+ # the brackets, so detect and handle that case specially.
+ #
+ r = KernRe(r'^([^[\]]*\s+)(.*)$')
+ if r.match(args[0]):
+ args[0] = r.group(2)
+ dtype = r.group(1)
+ else:
+ # No space in args[0]; this seems wrong but preserves previous behavior
+ dtype = ''
+
+ bitfield_re = KernRe(r'(.*?):(\w+)')
+ for param in args:
+ #
+ # For pointers, shift the star(s) from the variable name to the
+ # type declaration.
+ #
+ r = KernRe(r'^(\*+)\s*(.*)')
+ if r.match(param):
+ self.push_parameter(ln, decl_type, r.group(2),
+ f"{dtype} {r.group(1)}",
+ arg, declaration_name)
+ #
+ # Perform a similar shift for bitfields.
+ #
+ elif bitfield_re.search(param):
+ if dtype != "": # Skip unnamed bit-fields
+ self.push_parameter(ln, decl_type, bitfield_re.group(1),
+ f"{dtype}:{bitfield_re.group(2)}",
+ arg, declaration_name)
+ else:
+ self.push_parameter(ln, decl_type, param, dtype,
+ arg, declaration_name)
+
+ def check_sections(self, ln, decl_name, decl_type):
+ """
+ Check for errors inside sections, emitting warnings if not found
+ parameters are described.
+ """
+ for section in self.entry.sections:
+ if section not in self.entry.parameterlist and \
+ not known_sections.search(section):
+ if decl_type == 'function':
+ dname = f"{decl_type} parameter"
+ else:
+ dname = f"{decl_type} member"
+ self.emit_msg(ln,
+ f"Excess {dname} '{section}' description in '{decl_name}'")
+
+ def check_return_section(self, ln, declaration_name, return_type):
+ """
+ If the function doesn't return void, warns about the lack of a
+ return description.
+ """
+
+ if not self.config.wreturn:
+ return
+
+ # Ignore an empty return type (It's a macro)
+ # Ignore functions with a "void" return type (but not "void *")
+ if not return_type or KernRe(r'void\s*\w*\s*$').search(return_type):
+ return
+
+ if not self.entry.sections.get("Return", None):
+ self.emit_msg(ln,
+ f"No description found for return value of '{declaration_name}'")
+
+ #
+ # Split apart a structure prototype; returns (struct|union, name, members) or None
+ #
+ def split_struct_proto(self, proto):
+ type_pattern = r'(struct|union)'
+ qualifiers = [
+ "__attribute__",
+ "__packed",
+ "__aligned",
+ "____cacheline_aligned_in_smp",
+ "____cacheline_aligned",
+ ]
+ definition_body = r'\{(.*)\}\s*' + "(?:" + '|'.join(qualifiers) + ")?"
+
+ r = KernRe(type_pattern + r'\s+(\w+)\s*' + definition_body)
+ if r.search(proto):
+ return (r.group(1), r.group(2), r.group(3))
+ else:
+ r = KernRe(r'typedef\s+' + type_pattern + r'\s*' + definition_body + r'\s*(\w+)\s*;')
+ if r.search(proto):
+ return (r.group(1), r.group(3), r.group(2))
+ return None
+ #
+ # Rewrite the members of a structure or union for easier formatting later on.
+ # Among other things, this function will turn a member like:
+ #
+ # struct { inner_members; } foo;
+ #
+ # into:
+ #
+ # struct foo; inner_members;
+ #
+ def rewrite_struct_members(self, members):
+ #
+ # Process struct/union members from the most deeply nested outward. The
+ # trick is in the ^{ below - it prevents a match of an outer struct/union
+ # until the inner one has been munged (removing the "{" in the process).
+ #
+ struct_members = KernRe(r'(struct|union)' # 0: declaration type
+ r'([^\{\};]+)' # 1: possible name
+ r'(\{)'
+ r'([^\{\}]*)' # 3: Contents of declaration
+ r'(\})'
+ r'([^\{\};]*)(;)') # 5: Remaining stuff after declaration
+ tuples = struct_members.findall(members)
+ while tuples:
+ for t in tuples:
+ newmember = ""
+ oldmember = "".join(t) # Reconstruct the original formatting
+ dtype, name, lbr, content, rbr, rest, semi = t
+ #
+ # Pass through each field name, normalizing the form and formatting.
+ #
+ for s_id in rest.split(','):
+ s_id = s_id.strip()
+ newmember += f"{dtype} {s_id}; "
+ #
+ # Remove bitfield/array/pointer info, getting the bare name.
+ #
+ s_id = KernRe(r'[:\[].*').sub('', s_id)
+ s_id = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', s_id)
+ #
+ # Pass through the members of this inner structure/union.
+ #
+ for arg in content.split(';'):
+ arg = arg.strip()
+ #
+ # Look for (type)(*name)(args) - pointer to function
+ #
+ r = KernRe(r'^([^\(]+\(\*?\s*)([\w.]*)(\s*\).*)')
+ if r.match(arg):
+ dtype, name, extra = r.group(1), r.group(2), r.group(3)
+ # Pointer-to-function
+ if not s_id:
+ # Anonymous struct/union
+ newmember += f"{dtype}{name}{extra}; "
+ else:
+ newmember += f"{dtype}{s_id}.{name}{extra}; "
+ #
+ # Otherwise a non-function member.
+ #
+ else:
+ #
+ # Remove bitmap and array portions and spaces around commas
+ #
+ arg = KernRe(r':\s*\d+\s*').sub('', arg)
+ arg = KernRe(r'\[.*\]').sub('', arg)
+ arg = KernRe(r'\s*,\s*').sub(',', arg)
+ #
+ # Look for a normal decl - "type name[,name...]"
+ #
+ r = KernRe(r'(.*)\s+([\S+,]+)')
+ if r.search(arg):
+ for name in r.group(2).split(','):
+ name = KernRe(r'^\s*\**(\S+)\s*').sub(r'\1', name)
+ if not s_id:
+ # Anonymous struct/union
+ newmember += f"{r.group(1)} {name}; "
+ else:
+ newmember += f"{r.group(1)} {s_id}.{name}; "
+ else:
+ newmember += f"{arg}; "
+ #
+ # At the end of the s_id loop, replace the original declaration with
+ # the munged version.
+ #
+ members = members.replace(oldmember, newmember)
+ #
+ # End of the tuple loop - search again and see if there are outer members
+ # that now turn up.
+ #
+ tuples = struct_members.findall(members)
+ return members
+
+ #
+ # Format the struct declaration into a standard form for inclusion in the
+ # resulting docs.
+ #
+ def format_struct_decl(self, declaration):
+ #
+ # Insert newlines, get rid of extra spaces.
+ #
+ declaration = KernRe(r'([\{;])').sub(r'\1\n', declaration)
+ declaration = KernRe(r'\}\s+;').sub('};', declaration)
+ #
+ # Format inline enums with each member on its own line.
+ #
+ r = KernRe(r'(enum\s+\{[^\}]+),([^\n])')
+ while r.search(declaration):
+ declaration = r.sub(r'\1,\n\2', declaration)
+ #
+ # Now go through and supply the right number of tabs
+ # for each line.
+ #
+ def_args = declaration.split('\n')
+ level = 1
+ declaration = ""
+ for clause in def_args:
+ clause = KernRe(r'\s+').sub(' ', clause.strip(), count=1)
+ if clause:
+ if '}' in clause and level > 1:
+ level -= 1
+ if not clause.startswith('#'):
+ declaration += "\t" * level
+ declaration += "\t" + clause + "\n"
+ if "{" in clause and "}" not in clause:
+ level += 1
+ return declaration
+
+
+ def dump_struct(self, ln, proto):
+ """
+ Store an entry for a struct or union
+ """
+ #
+ # Do the basic parse to get the pieces of the declaration.
+ #
+ struct_parts = self.split_struct_proto(proto)
+ if not struct_parts:
+ self.emit_msg(ln, f"{proto} error: Cannot parse struct or union!")
+ return
+ decl_type, declaration_name, members = struct_parts
+
+ if self.entry.identifier != declaration_name:
+ self.emit_msg(ln, f"expecting prototype for {decl_type} {self.entry.identifier}. "
+ f"Prototype was for {decl_type} {declaration_name} instead\n")
+ return
+ #
+ # Go through the list of members applying all of our transformations.
+ #
+ members = trim_private_members(members)
+ members = apply_transforms(struct_xforms, members)
+
+ nested = NestedMatch()
+ for search, sub in struct_nested_prefixes:
+ members = nested.sub(search, sub, members)
+ #
+ # Deal with embedded struct and union members, and drop enums entirely.
+ #
+ declaration = members
+ members = self.rewrite_struct_members(members)
+ members = re.sub(r'(\{[^\{\}]*\})', '', members)
+ #
+ # Output the result and we are done.
+ #
+ self.create_parameter_list(ln, decl_type, members, ';',
+ declaration_name)
+ self.check_sections(ln, declaration_name, decl_type)
+ self.output_declaration(decl_type, declaration_name,
+ definition=self.format_struct_decl(declaration),
+ purpose=self.entry.declaration_purpose)
+
+ def dump_enum(self, ln, proto):
+ """
+ Stores an enum inside self.entries array.
+ """
+ #
+ # Strip preprocessor directives. Note that this depends on the
+ # trailing semicolon we added in process_proto_type().
+ #
+ proto = KernRe(r'#\s*((define|ifdef|if)\s+|endif)[^;]*;', flags=re.S).sub('', proto)
+ #
+ # Parse out the name and members of the enum. Typedef form first.
+ #
+ r = KernRe(r'typedef\s+enum\s*\{(.*)\}\s*(\w*)\s*;')
+ if r.search(proto):
+ declaration_name = r.group(2)
+ members = trim_private_members(r.group(1))
+ #
+ # Failing that, look for a straight enum
+ #
+ else:
+ r = KernRe(r'enum\s+(\w*)\s*\{(.*)\}')
+ if r.match(proto):
+ declaration_name = r.group(1)
+ members = trim_private_members(r.group(2))
+ #
+ # OK, this isn't going to work.
+ #
+ else:
+ self.emit_msg(ln, f"{proto}: error: Cannot parse enum!")
+ return
+ #
+ # Make sure we found what we were expecting.
+ #
+ if self.entry.identifier != declaration_name:
+ if self.entry.identifier == "":
+ self.emit_msg(ln,
+ f"{proto}: wrong kernel-doc identifier on prototype")
+ else:
+ self.emit_msg(ln,
+ f"expecting prototype for enum {self.entry.identifier}. "
+ f"Prototype was for enum {declaration_name} instead")
+ return
+
+ if not declaration_name:
+ declaration_name = "(anonymous)"
+ #
+ # Parse out the name of each enum member, and verify that we
+ # have a description for it.
+ #
+ member_set = set()
+ members = KernRe(r'\([^;)]*\)').sub('', members)
+ for arg in members.split(','):
+ if not arg:
+ continue
+ arg = KernRe(r'^\s*(\w+).*').sub(r'\1', arg)
+ self.entry.parameterlist.append(arg)
+ if arg not in self.entry.parameterdescs:
+ self.entry.parameterdescs[arg] = self.undescribed
+ self.emit_msg(ln,
+ f"Enum value '{arg}' not described in enum '{declaration_name}'")
+ member_set.add(arg)
+ #
+ # Ensure that every described member actually exists in the enum.
+ #
+ for k in self.entry.parameterdescs:
+ if k not in member_set:
+ self.emit_msg(ln,
+ f"Excess enum value '@{k}' description in '{declaration_name}'")
+
+ self.output_declaration('enum', declaration_name,
+ purpose=self.entry.declaration_purpose)
+
+ def dump_declaration(self, ln, prototype):
+ """
+ Stores a data declaration inside self.entries array.
+ """
+
+ if self.entry.decl_type == "enum":
+ self.dump_enum(ln, prototype)
+ elif self.entry.decl_type == "typedef":
+ self.dump_typedef(ln, prototype)
+ elif self.entry.decl_type in ["union", "struct"]:
+ self.dump_struct(ln, prototype)
+ else:
+ # This would be a bug
+ self.emit_message(ln, f'Unknown declaration type: {self.entry.decl_type}')
+
+ def dump_function(self, ln, prototype):
+ """
+ Stores a function or function macro inside self.entries array.
+ """
+
+ found = func_macro = False
+ return_type = ''
+ decl_type = 'function'
+ #
+ # Apply the initial transformations.
+ #
+ prototype = apply_transforms(function_xforms, prototype)
+ #
+ # If we have a macro, remove the "#define" at the front.
+ #
+ new_proto = KernRe(r"^#\s*define\s+").sub("", prototype)
+ if new_proto != prototype:
+ prototype = new_proto
+ #
+ # Dispense with the simple "#define A B" case here; the key
+ # is the space after the name of the symbol being defined.
+ # NOTE that the seemingly misnamed "func_macro" indicates a
+ # macro *without* arguments.
+ #
+ r = KernRe(r'^(\w+)\s+')
+ if r.search(prototype):
+ return_type = ''
+ declaration_name = r.group(1)
+ func_macro = True
+ found = True
+
+ # Yes, this truly is vile. We are looking for:
+ # 1. Return type (may be nothing if we're looking at a macro)
+ # 2. Function name
+ # 3. Function parameters.
+ #
+ # All the while we have to watch out for function pointer parameters
+ # (which IIRC is what the two sections are for), C types (these
+ # regexps don't even start to express all the possibilities), and
+ # so on.
+ #
+ # If you mess with these regexps, it's a good idea to check that
+ # the following functions' documentation still comes out right:
+ # - parport_register_device (function pointer parameters)
+ # - atomic_set (macro)
+ # - pci_match_device, __copy_to_user (long return type)
+
+ name = r'\w+'
+ type1 = r'(?:[\w\s]+)?'
+ type2 = r'(?:[\w\s]+\*+)+'
+ #
+ # Attempt to match first on (args) with no internal parentheses; this
+ # lets us easily filter out __acquires() and other post-args stuff. If
+ # that fails, just grab the rest of the line to the last closing
+ # parenthesis.
+ #
+ proto_args = r'\(([^\(]*|.*)\)'
+ #
+ # (Except for the simple macro case) attempt to split up the prototype
+ # in the various ways we understand.
+ #
+ if not found:
+ patterns = [
+ rf'^()({name})\s*{proto_args}',
+ rf'^({type1})\s+({name})\s*{proto_args}',
+ rf'^({type2})\s*({name})\s*{proto_args}',
+ ]
+
+ for p in patterns:
+ r = KernRe(p)
+ if r.match(prototype):
+ return_type = r.group(1)
+ declaration_name = r.group(2)
+ args = r.group(3)
+ self.create_parameter_list(ln, decl_type, args, ',',
+ declaration_name)
+ found = True
+ break
+ #
+ # Parsing done; make sure that things are as we expect.
+ #
+ if not found:
+ self.emit_msg(ln,
+ f"cannot understand function prototype: '{prototype}'")
+ return
+ if self.entry.identifier != declaration_name:
+ self.emit_msg(ln, f"expecting prototype for {self.entry.identifier}(). "
+ f"Prototype was for {declaration_name}() instead")
+ return
+ self.check_sections(ln, declaration_name, "function")
+ self.check_return_section(ln, declaration_name, return_type)
+ #
+ # Store the result.
+ #
+ self.output_declaration(decl_type, declaration_name,
+ typedef=('typedef' in return_type),
+ functiontype=return_type,
+ purpose=self.entry.declaration_purpose,
+ func_macro=func_macro)
+
+
+ def dump_typedef(self, ln, proto):
+ """
+ Stores a typedef inside self.entries array.
+ """
+ #
+ # We start by looking for function typedefs.
+ #
+ typedef_type = r'typedef((?:\s+[\w*]+\b){0,7}\s+(?:\w+\b|\*+))\s*'
+ typedef_ident = r'\*?\s*(\w\S+)\s*'
+ typedef_args = r'\s*\((.*)\);'
+
+ typedef1 = KernRe(typedef_type + r'\(' + typedef_ident + r'\)' + typedef_args)
+ typedef2 = KernRe(typedef_type + typedef_ident + typedef_args)
+
+ # Parse function typedef prototypes
+ for r in [typedef1, typedef2]:
+ if not r.match(proto):
+ continue
+
+ return_type = r.group(1).strip()
+ declaration_name = r.group(2)
+ args = r.group(3)
+
+ if self.entry.identifier != declaration_name:
+ self.emit_msg(ln,
+ f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
+ return
+
+ self.create_parameter_list(ln, 'function', args, ',', declaration_name)
+
+ self.output_declaration('function', declaration_name,
+ typedef=True,
+ functiontype=return_type,
+ purpose=self.entry.declaration_purpose)
+ return
+ #
+ # Not a function, try to parse a simple typedef.
+ #
+ r = KernRe(r'typedef.*\s+(\w+)\s*;')
+ if r.match(proto):
+ declaration_name = r.group(1)
+
+ if self.entry.identifier != declaration_name:
+ self.emit_msg(ln,
+ f"expecting prototype for typedef {self.entry.identifier}. Prototype was for typedef {declaration_name} instead\n")
+ return
+
+ self.output_declaration('typedef', declaration_name,
+ purpose=self.entry.declaration_purpose)
+ return
+
+ self.emit_msg(ln, "error: Cannot parse typedef!")
+
+ @staticmethod
+ def process_export(function_set, line):
+ """
+ process EXPORT_SYMBOL* tags
+
+ This method doesn't use any variable from the class, so declare it
+ with a staticmethod decorator.
+ """
+
+ # We support documenting some exported symbols with different
+ # names. A horrible hack.
+ suffixes = [ '_noprof' ]
+
+ # Note: it accepts only one EXPORT_SYMBOL* per line, as having
+ # multiple export lines would violate Kernel coding style.
+
+ if export_symbol.search(line):
+ symbol = export_symbol.group(2)
+ elif export_symbol_ns.search(line):
+ symbol = export_symbol_ns.group(2)
+ else:
+ return False
+ #
+ # Found an export, trim out any special suffixes
+ #
+ for suffix in suffixes:
+ # Be backward compatible with Python < 3.9
+ if symbol.endswith(suffix):
+ symbol = symbol[:-len(suffix)]
+ function_set.add(symbol)
+ return True
+
+ def process_normal(self, ln, line):
+ """
+ STATE_NORMAL: looking for the /** to begin everything.
+ """
+
+ if not doc_start.match(line):
+ return
+
+ # start a new entry
+ self.reset_state(ln)
+
+ # next line is always the function name
+ self.state = state.NAME
+
+ def process_name(self, ln, line):
+ """
+ STATE_NAME: Looking for the "name - description" line
+ """
+ #
+ # Check for a DOC: block and handle them specially.
+ #
+ if doc_block.search(line):
+
+ if not doc_block.group(1):
+ self.entry.begin_section(ln, "Introduction")
+ else:
+ self.entry.begin_section(ln, doc_block.group(1))
+
+ self.entry.identifier = self.entry.section
+ self.state = state.DOCBLOCK
+ #
+ # Otherwise we're looking for a normal kerneldoc declaration line.
+ #
+ elif doc_decl.search(line):
+ self.entry.identifier = doc_decl.group(1)
+
+ # Test for data declaration
+ if doc_begin_data.search(line):
+ self.entry.decl_type = doc_begin_data.group(1)
+ self.entry.identifier = doc_begin_data.group(2)
+ #
+ # Look for a function description
+ #
+ elif doc_begin_func.search(line):
+ self.entry.identifier = doc_begin_func.group(1)
+ self.entry.decl_type = "function"
+ #
+ # We struck out.
+ #
+ else:
+ self.emit_msg(ln,
+ f"This comment starts with '/**', but isn't a kernel-doc comment. Refer to Documentation/doc-guide/kernel-doc.rst\n{line}")
+ self.state = state.NORMAL
+ return
+ #
+ # OK, set up for a new kerneldoc entry.
+ #
+ self.state = state.BODY
+ self.entry.identifier = self.entry.identifier.strip(" ")
+ # if there's no @param blocks need to set up default section here
+ self.entry.begin_section(ln + 1)
+ #
+ # Find the description portion, which *should* be there but
+ # isn't always.
+ # (We should be able to capture this from the previous parsing - someday)
+ #
+ r = KernRe("[-:](.*)")
+ if r.search(line):
+ self.entry.declaration_purpose = trim_whitespace(r.group(1))
+ self.state = state.DECLARATION
+ else:
+ self.entry.declaration_purpose = ""
+
+ if not self.entry.declaration_purpose and self.config.wshort_desc:
+ self.emit_msg(ln,
+ f"missing initial short description on line:\n{line}")
+
+ if not self.entry.identifier and self.entry.decl_type != "enum":
+ self.emit_msg(ln,
+ f"wrong kernel-doc identifier on line:\n{line}")
+ self.state = state.NORMAL
+
+ if self.config.verbose:
+ self.emit_msg(ln,
+ f"Scanning doc for {self.entry.decl_type} {self.entry.identifier}",
+ warning=False)
+ #
+ # Failed to find an identifier. Emit a warning
+ #
+ else:
+ self.emit_msg(ln, f"Cannot find identifier on line:\n{line}")
+
+ #
+ # Helper function to determine if a new section is being started.
+ #
+ def is_new_section(self, ln, line):
+ if doc_sect.search(line):
+ self.state = state.BODY
+ #
+ # Pick out the name of our new section, tweaking it if need be.
+ #
+ newsection = doc_sect.group(1)
+ if newsection.lower() == 'description':
+ newsection = 'Description'
+ elif newsection.lower() == 'context':
+ newsection = 'Context'
+ self.state = state.SPECIAL_SECTION
+ elif newsection.lower() in ["@return", "@returns",
+ "return", "returns"]:
+ newsection = "Return"
+ self.state = state.SPECIAL_SECTION
+ elif newsection[0] == '@':
+ self.state = state.SPECIAL_SECTION
+ #
+ # Initialize the contents, and get the new section going.
+ #
+ newcontents = doc_sect.group(2)
+ if not newcontents:
+ newcontents = ""
+ self.dump_section()
+ self.entry.begin_section(ln, newsection)
+ self.entry.leading_space = None
+
+ self.entry.add_text(newcontents.lstrip())
+ return True
+ return False
+
+ #
+ # Helper function to detect (and effect) the end of a kerneldoc comment.
+ #
+ def is_comment_end(self, ln, line):
+ if doc_end.search(line):
+ self.dump_section()
+
+ # Look for doc_com + <text> + doc_end:
+ r = KernRe(r'\s*\*\s*[a-zA-Z_0-9:.]+\*/')
+ if r.match(line):
+ self.emit_msg(ln, f"suspicious ending line: {line}")
+
+ self.entry.prototype = ""
+ self.entry.new_start_line = ln + 1
+
+ self.state = state.PROTO
+ return True
+ return False
+
+
+ def process_decl(self, ln, line):
+ """
+ STATE_DECLARATION: We've seen the beginning of a declaration
+ """
+ if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
+ return
+ #
+ # Look for anything with the " * " line beginning.
+ #
+ if doc_content.search(line):
+ cont = doc_content.group(1)
+ #
+ # A blank line means that we have moved out of the declaration
+ # part of the comment (without any "special section" parameter
+ # descriptions).
+ #
+ if cont == "":
+ self.state = state.BODY
+ #
+ # Otherwise we have more of the declaration section to soak up.
+ #
+ else:
+ self.entry.declaration_purpose = \
+ trim_whitespace(self.entry.declaration_purpose + ' ' + cont)
+ else:
+ # Unknown line, ignore
+ self.emit_msg(ln, f"bad line: {line}")
+
+
+ def process_special(self, ln, line):
+ """
+ STATE_SPECIAL_SECTION: a section ending with a blank line
+ """
+ #
+ # If we have hit a blank line (only the " * " marker), then this
+ # section is done.
+ #
+ if KernRe(r"\s*\*\s*$").match(line):
+ self.entry.begin_section(ln, dump = True)
+ self.state = state.BODY
+ return
+ #
+ # Not a blank line, look for the other ways to end the section.
+ #
+ if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
+ return
+ #
+ # OK, we should have a continuation of the text for this section.
+ #
+ if doc_content.search(line):
+ cont = doc_content.group(1)
+ #
+ # If the lines of text after the first in a special section have
+ # leading white space, we need to trim it out or Sphinx will get
+ # confused. For the second line (the None case), see what we
+ # find there and remember it.
+ #
+ if self.entry.leading_space is None:
+ r = KernRe(r'^(\s+)')
+ if r.match(cont):
+ self.entry.leading_space = len(r.group(1))
+ else:
+ self.entry.leading_space = 0
+ #
+ # Otherwise, before trimming any leading chars, be *sure*
+ # that they are white space. We should maybe warn if this
+ # isn't the case.
+ #
+ for i in range(0, self.entry.leading_space):
+ if cont[i] != " ":
+ self.entry.leading_space = i
+ break
+ #
+ # Add the trimmed result to the section and we're done.
+ #
+ self.entry.add_text(cont[self.entry.leading_space:])
+ else:
+ # Unknown line, ignore
+ self.emit_msg(ln, f"bad line: {line}")
+
+ def process_body(self, ln, line):
+ """
+ STATE_BODY: the bulk of a kerneldoc comment.
+ """
+ if self.is_new_section(ln, line) or self.is_comment_end(ln, line):
+ return
+
+ if doc_content.search(line):
+ cont = doc_content.group(1)
+ self.entry.add_text(cont)
+ else:
+ # Unknown line, ignore
+ self.emit_msg(ln, f"bad line: {line}")
+
+ def process_inline_name(self, ln, line):
+ """STATE_INLINE_NAME: beginning of docbook comments within a prototype."""
+
+ if doc_inline_sect.search(line):
+ self.entry.begin_section(ln, doc_inline_sect.group(1))
+ self.entry.add_text(doc_inline_sect.group(2).lstrip())
+ self.state = state.INLINE_TEXT
+ elif doc_inline_end.search(line):
+ self.dump_section()
+ self.state = state.PROTO
+ elif doc_content.search(line):
+ self.emit_msg(ln, f"Incorrect use of kernel-doc format: {line}")
+ self.state = state.PROTO
+ # else ... ??
+
+ def process_inline_text(self, ln, line):
+ """STATE_INLINE_TEXT: docbook comments within a prototype."""
+
+ if doc_inline_end.search(line):
+ self.dump_section()
+ self.state = state.PROTO
+ elif doc_content.search(line):
+ self.entry.add_text(doc_content.group(1))
+ # else ... ??
+
+ def syscall_munge(self, ln, proto): # pylint: disable=W0613
+ """
+ Handle syscall definitions
+ """
+
+ is_void = False
+
+ # Strip newlines/CR's
+ proto = re.sub(r'[\r\n]+', ' ', proto)
+
+ # Check if it's a SYSCALL_DEFINE0
+ if 'SYSCALL_DEFINE0' in proto:
+ is_void = True
+
+ # Replace SYSCALL_DEFINE with correct return type & function name
+ proto = KernRe(r'SYSCALL_DEFINE.*\(').sub('long sys_', proto)
+
+ r = KernRe(r'long\s+(sys_.*?),')
+ if r.search(proto):
+ proto = KernRe(',').sub('(', proto, count=1)
+ elif is_void:
+ proto = KernRe(r'\)').sub('(void)', proto, count=1)
+
+ # Now delete all of the odd-numbered commas in the proto
+ # so that argument types & names don't have a comma between them
+ count = 0
+ length = len(proto)
+
+ if is_void:
+ length = 0 # skip the loop if is_void
+
+ for ix in range(length):
+ if proto[ix] == ',':
+ count += 1
+ if count % 2 == 1:
+ proto = proto[:ix] + ' ' + proto[ix + 1:]
+
+ return proto
+
+ def tracepoint_munge(self, ln, proto):
+ """
+ Handle tracepoint definitions
+ """
+
+ tracepointname = None
+ tracepointargs = None
+
+ # Match tracepoint name based on different patterns
+ r = KernRe(r'TRACE_EVENT\((.*?),')
+ if r.search(proto):
+ tracepointname = r.group(1)
+
+ r = KernRe(r'DEFINE_SINGLE_EVENT\((.*?),')
+ if r.search(proto):
+ tracepointname = r.group(1)
+
+ r = KernRe(r'DEFINE_EVENT\((.*?),(.*?),')
+ if r.search(proto):
+ tracepointname = r.group(2)
+
+ if tracepointname:
+ tracepointname = tracepointname.lstrip()
+
+ r = KernRe(r'TP_PROTO\((.*?)\)')
+ if r.search(proto):
+ tracepointargs = r.group(1)
+
+ if not tracepointname or not tracepointargs:
+ self.emit_msg(ln,
+ f"Unrecognized tracepoint format:\n{proto}\n")
+ else:
+ proto = f"static inline void trace_{tracepointname}({tracepointargs})"
+ self.entry.identifier = f"trace_{self.entry.identifier}"
+
+ return proto
+
+ def process_proto_function(self, ln, line):
+ """Ancillary routine to process a function prototype"""
+
+ # strip C99-style comments to end of line
+ line = KernRe(r"//.*$", re.S).sub('', line)
+ #
+ # Soak up the line's worth of prototype text, stopping at { or ; if present.
+ #
+ if KernRe(r'\s*#\s*define').match(line):
+ self.entry.prototype = line
+ elif not line.startswith('#'): # skip other preprocessor stuff
+ r = KernRe(r'([^\{]*)')
+ if r.match(line):
+ self.entry.prototype += r.group(1) + " "
+ #
+ # If we now have the whole prototype, clean it up and declare victory.
+ #
+ if '{' in line or ';' in line or KernRe(r'\s*#\s*define').match(line):
+ # strip comments and surrounding spaces
+ self.entry.prototype = KernRe(r'/\*.*\*/').sub('', self.entry.prototype).strip()
+ #
+ # Handle self.entry.prototypes for function pointers like:
+ # int (*pcs_config)(struct foo)
+ # by turning it into
+ # int pcs_config(struct foo)
+ #
+ r = KernRe(r'^(\S+\s+)\(\s*\*(\S+)\)')
+ self.entry.prototype = r.sub(r'\1\2', self.entry.prototype)
+ #
+ # Handle special declaration syntaxes
+ #
+ if 'SYSCALL_DEFINE' in self.entry.prototype:
+ self.entry.prototype = self.syscall_munge(ln,
+ self.entry.prototype)
+ else:
+ r = KernRe(r'TRACE_EVENT|DEFINE_EVENT|DEFINE_SINGLE_EVENT')
+ if r.search(self.entry.prototype):
+ self.entry.prototype = self.tracepoint_munge(ln,
+ self.entry.prototype)
+ #
+ # ... and we're done
+ #
+ self.dump_function(ln, self.entry.prototype)
+ self.reset_state(ln)
+
+ def process_proto_type(self, ln, line):
+ """Ancillary routine to process a type"""
+
+ # Strip C99-style comments and surrounding whitespace
+ line = KernRe(r"//.*$", re.S).sub('', line).strip()
+ if not line:
+ return # nothing to see here
+
+ # To distinguish preprocessor directive from regular declaration later.
+ if line.startswith('#'):
+ line += ";"
+ #
+ # Split the declaration on any of { } or ;, and accumulate pieces
+ # until we hit a semicolon while not inside {brackets}
+ #
+ r = KernRe(r'(.*?)([{};])')
+ for chunk in r.split(line):
+ if chunk: # Ignore empty matches
+ self.entry.prototype += chunk
+ #
+ # This cries out for a match statement ... someday after we can
+ # drop Python 3.9 ...
+ #
+ if chunk == '{':
+ self.entry.brcount += 1
+ elif chunk == '}':
+ self.entry.brcount -= 1
+ elif chunk == ';' and self.entry.brcount <= 0:
+ self.dump_declaration(ln, self.entry.prototype)
+ self.reset_state(ln)
+ return
+ #
+ # We hit the end of the line while still in the declaration; put
+ # in a space to represent the newline.
+ #
+ self.entry.prototype += ' '
+
+ def process_proto(self, ln, line):
+ """STATE_PROTO: reading a function/whatever prototype."""
+
+ if doc_inline_oneline.search(line):
+ self.entry.begin_section(ln, doc_inline_oneline.group(1))
+ self.entry.add_text(doc_inline_oneline.group(2))
+ self.dump_section()
+
+ elif doc_inline_start.search(line):
+ self.state = state.INLINE_NAME
+
+ elif self.entry.decl_type == 'function':
+ self.process_proto_function(ln, line)
+
+ else:
+ self.process_proto_type(ln, line)
+
+ def process_docblock(self, ln, line):
+ """STATE_DOCBLOCK: within a DOC: block."""
+
+ if doc_end.search(line):
+ self.dump_section()
+ self.output_declaration("doc", self.entry.identifier)
+ self.reset_state(ln)
+
+ elif doc_content.search(line):
+ self.entry.add_text(doc_content.group(1))
+
+ def parse_export(self):
+ """
+ Parses EXPORT_SYMBOL* macros from a single Kernel source file.
+ """
+
+ export_table = set()
+
+ try:
+ with open(self.fname, "r", encoding="utf8",
+ errors="backslashreplace") as fp:
+
+ for line in fp:
+ self.process_export(export_table, line)
+
+ except IOError:
+ return None
+
+ return export_table
+
+ #
+ # The state/action table telling us which function to invoke in
+ # each state.
+ #
+ state_actions = {
+ state.NORMAL: process_normal,
+ state.NAME: process_name,
+ state.BODY: process_body,
+ state.DECLARATION: process_decl,
+ state.SPECIAL_SECTION: process_special,
+ state.INLINE_NAME: process_inline_name,
+ state.INLINE_TEXT: process_inline_text,
+ state.PROTO: process_proto,
+ state.DOCBLOCK: process_docblock,
+ }
+
+ def parse_kdoc(self):
+ """
+ Open and process each line of a C source file.
+ The parsing is controlled via a state machine, and the line is passed
+ to a different process function depending on the state. The process
+ function may update the state as needed.
+
+ Besides parsing kernel-doc tags, it also parses export symbols.
+ """
+
+ prev = ""
+ prev_ln = None
+ export_table = set()
+
+ try:
+ with open(self.fname, "r", encoding="utf8",
+ errors="backslashreplace") as fp:
+ for ln, line in enumerate(fp):
+
+ line = line.expandtabs().strip("\n")
+
+ # Group continuation lines on prototypes
+ if self.state == state.PROTO:
+ if line.endswith("\\"):
+ prev += line.rstrip("\\")
+ if not prev_ln:
+ prev_ln = ln
+ continue
+
+ if prev:
+ ln = prev_ln
+ line = prev + line
+ prev = ""
+ prev_ln = None
+
+ self.config.log.debug("%d %s: %s",
+ ln, state.name[self.state],
+ line)
+
+ # This is an optimization over the original script.
+ # There, when export_file was used for the same file,
+ # it was read twice. Here, we use the already-existing
+ # loop to parse exported symbols as well.
+ #
+ if (self.state != state.NORMAL) or \
+ not self.process_export(export_table, line):
+ # Hand this line to the appropriate state handler
+ self.state_actions[self.state](self, ln, line)
+
+ except OSError:
+ self.config.log.error(f"Error: Cannot open file {self.fname}")
+
+ return export_table, self.entries
diff --git a/tools/lib/python/kdoc/kdoc_re.py b/tools/lib/python/kdoc/kdoc_re.py
new file mode 100644
index 000000000000..2dfa1bf83d64
--- /dev/null
+++ b/tools/lib/python/kdoc/kdoc_re.py
@@ -0,0 +1,270 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2025: Mauro Carvalho Chehab <mchehab@kernel.org>.
+
+"""
+Regular expression ancillary classes.
+
+Those help caching regular expressions and do matching for kernel-doc.
+"""
+
+import re
+
+# Local cache for regular expressions
+re_cache = {}
+
+
+class KernRe:
+ """
+ Helper class to simplify regex declaration and usage.
+
+ It calls re.compile for a given pattern. It also allows adding
+ regular expressions and define sub at class init time.
+
+ Regular expressions can be cached via an argument, helping to speedup
+ searches.
+ """
+
+ def _add_regex(self, string, flags):
+ """
+ Adds a new regex or reuses it from the cache.
+ """
+ self.regex = re_cache.get(string, None)
+ if not self.regex:
+ self.regex = re.compile(string, flags=flags)
+ if self.cache:
+ re_cache[string] = self.regex
+
+ def __init__(self, string, cache=True, flags=0):
+ """
+ Compile a regular expression and initialize internal vars.
+ """
+
+ self.cache = cache
+ self.last_match = None
+
+ self._add_regex(string, flags)
+
+ def __str__(self):
+ """
+ Return the regular expression pattern.
+ """
+ return self.regex.pattern
+
+ def __add__(self, other):
+ """
+ Allows adding two regular expressions into one.
+ """
+
+ return KernRe(str(self) + str(other), cache=self.cache or other.cache,
+ flags=self.regex.flags | other.regex.flags)
+
+ def match(self, string):
+ """
+ Handles a re.match storing its results
+ """
+
+ self.last_match = self.regex.match(string)
+ return self.last_match
+
+ def search(self, string):
+ """
+ Handles a re.search storing its results
+ """
+
+ self.last_match = self.regex.search(string)
+ return self.last_match
+
+ def findall(self, string):
+ """
+ Alias to re.findall
+ """
+
+ return self.regex.findall(string)
+
+ def split(self, string):
+ """
+ Alias to re.split
+ """
+
+ return self.regex.split(string)
+
+ def sub(self, sub, string, count=0):
+ """
+ Alias to re.sub
+ """
+
+ return self.regex.sub(sub, string, count=count)
+
+ def group(self, num):
+ """
+ Returns the group results of the last match
+ """
+
+ return self.last_match.group(num)
+
+
+class NestedMatch:
+ """
+ Finding nested delimiters is hard with regular expressions. It is
+ even harder on Python with its normal re module, as there are several
+ advanced regular expressions that are missing.
+
+ This is the case of this pattern:
+
+ '\\bSTRUCT_GROUP(\\(((?:(?>[^)(]+)|(?1))*)\\))[^;]*;'
+
+ which is used to properly match open/close parentheses of the
+ string search STRUCT_GROUP(),
+
+ Add a class that counts pairs of delimiters, using it to match and
+ replace nested expressions.
+
+ The original approach was suggested by:
+ https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+
+ Although I re-implemented it to make it more generic and match 3 types
+ of delimiters. The logic checks if delimiters are paired. If not, it
+ will ignore the search string.
+ """
+
+ # TODO: make NestedMatch handle multiple match groups
+ #
+ # Right now, regular expressions to match it are defined only up to
+ # the start delimiter, e.g.:
+ #
+ # \bSTRUCT_GROUP\(
+ #
+ # is similar to: STRUCT_GROUP\((.*)\)
+ # except that the content inside the match group is delimiter-aligned.
+ #
+ # The content inside parentheses is converted into a single replace
+ # group (e.g. r`\1').
+ #
+ # It would be nice to change such definition to support multiple
+ # match groups, allowing a regex equivalent to:
+ #
+ # FOO\((.*), (.*), (.*)\)
+ #
+ # it is probably easier to define it not as a regular expression, but
+ # with some lexical definition like:
+ #
+ # FOO(arg1, arg2, arg3)
+
+ DELIMITER_PAIRS = {
+ '{': '}',
+ '(': ')',
+ '[': ']',
+ }
+
+ RE_DELIM = re.compile(r'[\{\}\[\]\(\)]')
+
+ def _search(self, regex, line):
+ """
+ Finds paired blocks for a regex that ends with a delimiter.
+
+ The suggestion of using finditer to match pairs came from:
+ https://stackoverflow.com/questions/5454322/python-how-to-match-nested-parentheses-with-regex
+ but I ended using a different implementation to align all three types
+ of delimiters and seek for an initial regular expression.
+
+ The algorithm seeks for open/close paired delimiters and places them
+ into a stack, yielding a start/stop position of each match when the
+ stack is zeroed.
+
+ The algorithm should work fine for properly paired lines, but will
+ silently ignore end delimiters that precede a start delimiter.
+ This should be OK for kernel-doc parser, as unaligned delimiters
+ would cause compilation errors. So, we don't need to raise exceptions
+ to cover such issues.
+ """
+
+ stack = []
+
+ for match_re in regex.finditer(line):
+ start = match_re.start()
+ offset = match_re.end()
+
+ d = line[offset - 1]
+ if d not in self.DELIMITER_PAIRS:
+ continue
+
+ end = self.DELIMITER_PAIRS[d]
+ stack.append(end)
+
+ for match in self.RE_DELIM.finditer(line[offset:]):
+ pos = match.start() + offset
+
+ d = line[pos]
+
+ if d in self.DELIMITER_PAIRS:
+ end = self.DELIMITER_PAIRS[d]
+
+ stack.append(end)
+ continue
+
+ # Does the end delimiter match what is expected?
+ if stack and d == stack[-1]:
+ stack.pop()
+
+ if not stack:
+ yield start, offset, pos + 1
+ break
+
+ def search(self, regex, line):
+ """
+ This is similar to re.search:
+
+ It matches a regex that it is followed by a delimiter,
+ returning occurrences only if all delimiters are paired.
+ """
+
+ for t in self._search(regex, line):
+
+ yield line[t[0]:t[2]]
+
+ def sub(self, regex, sub, line, count=0):
+ """
+ This is similar to re.sub:
+
+ It matches a regex that it is followed by a delimiter,
+ replacing occurrences only if all delimiters are paired.
+
+ if r'\1' is used, it works just like re: it places there the
+ matched paired data with the delimiter stripped.
+
+ If count is different than zero, it will replace at most count
+ items.
+ """
+ out = ""
+
+ cur_pos = 0
+ n = 0
+
+ for start, end, pos in self._search(regex, line):
+ out += line[cur_pos:start]
+
+ # Value, ignoring start/end delimiters
+ value = line[end:pos - 1]
+
+ # replaces \1 at the sub string, if \1 is used there
+ new_sub = sub
+ new_sub = new_sub.replace(r'\1', value)
+
+ out += new_sub
+
+ # Drop end ';' if any
+ if line[pos] == ';':
+ pos += 1
+
+ cur_pos = pos
+ n += 1
+
+ if count and count >= n:
+ break
+
+ # Append the remaining string
+ l = len(line)
+ out += line[cur_pos:l]
+
+ return out
diff --git a/tools/lib/python/kdoc/latex_fonts.py b/tools/lib/python/kdoc/latex_fonts.py
new file mode 100755
index 000000000000..29317f8006ea
--- /dev/null
+++ b/tools/lib/python/kdoc/latex_fonts.py
@@ -0,0 +1,167 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) Akira Yokosawa, 2024
+#
+# Ported to Python by (c) Mauro Carvalho Chehab, 2025
+
+"""
+Detect problematic Noto CJK variable fonts.
+
+For "make pdfdocs", reports of build errors of translations.pdf started
+arriving early 2024 [1, 2]. It turned out that Fedora and openSUSE
+tumbleweed have started deploying variable-font [3] format of "Noto CJK"
+fonts [4, 5]. For PDF, a LaTeX package named xeCJK is used for CJK
+(Chinese, Japanese, Korean) pages. xeCJK requires XeLaTeX/XeTeX, which
+does not (and likely never will) understand variable fonts for historical
+reasons.
+
+The build error happens even when both of variable- and non-variable-format
+fonts are found on the build system. To make matters worse, Fedora enlists
+variable "Noto CJK" fonts in the requirements of langpacks-ja, -ko, -zh_CN,
+-zh_TW, etc. Hence developers who have interest in CJK pages are more
+likely to encounter the build errors.
+
+This script is invoked from the error path of "make pdfdocs" and emits
+suggestions if variable-font files of "Noto CJK" fonts are in the list of
+fonts accessible from XeTeX.
+
+References:
+[1]: https://lore.kernel.org/r/8734tqsrt7.fsf@meer.lwn.net/
+[2]: https://lore.kernel.org/r/1708585803.600323099@f111.i.mail.ru/
+[3]: https://en.wikipedia.org/wiki/Variable_font
+[4]: https://fedoraproject.org/wiki/Changes/Noto_CJK_Variable_Fonts
+[5]: https://build.opensuse.org/request/show/1157217
+
+#===========================================================================
+Workarounds for building translations.pdf
+#===========================================================================
+
+* Denylist "variable font" Noto CJK fonts.
+ - Create $HOME/deny-vf/fontconfig/fonts.conf from template below, with
+ tweaks if necessary. Remove leading "".
+ - Path of fontconfig/fonts.conf can be overridden by setting an env
+ variable FONTS_CONF_DENY_VF.
+
+ * Template:
+-----------------------------------------------------------------
+<?xml version="1.0"?>
+<!DOCTYPE fontconfig SYSTEM "urn:fontconfig:fonts.dtd">
+<fontconfig>
+<!--
+ Ignore variable-font glob (not to break xetex)
+-->
+ <selectfont>
+ <rejectfont>
+ <!--
+ for Fedora
+ -->
+ <glob>/usr/share/fonts/google-noto-*-cjk-vf-fonts</glob>
+ <!--
+ for openSUSE tumbleweed
+ -->
+ <glob>/usr/share/fonts/truetype/Noto*CJK*-VF.otf</glob>
+ </rejectfont>
+ </selectfont>
+</fontconfig>
+-----------------------------------------------------------------
+
+ The denylisting is activated for "make pdfdocs".
+
+* For skipping CJK pages in PDF
+ - Uninstall texlive-xecjk.
+ Denylisting is not needed in this case.
+
+* For printing CJK pages in PDF
+ - Need non-variable "Noto CJK" fonts.
+ * Fedora
+ - google-noto-sans-cjk-fonts
+ - google-noto-serif-cjk-fonts
+ * openSUSE tumbleweed
+ - Non-variable "Noto CJK" fonts are not available as distro packages
+ as of April, 2024. Fetch a set of font files from upstream Noto
+ CJK Font released at:
+ https://github.com/notofonts/noto-cjk/tree/main/Sans#super-otc
+ and at:
+ https://github.com/notofonts/noto-cjk/tree/main/Serif#super-otc
+ , then uncompress and deploy them.
+ - Remember to update fontconfig cache by running fc-cache.
+
+!!! Caution !!!
+ Uninstalling "variable font" packages can be dangerous.
+ They might be depended upon by other packages important for your work.
+ Denylisting should be less invasive, as it is effective only while
+ XeLaTeX runs in "make pdfdocs".
+"""
+
+import os
+import re
+import subprocess
+import textwrap
+import sys
+
+class LatexFontChecker:
+ """
+ Detect problems with CJK variable fonts that affect PDF builds for
+ translations.
+ """
+
+ def __init__(self, deny_vf=None):
+ if not deny_vf:
+ deny_vf = os.environ.get('FONTS_CONF_DENY_VF', "~/deny-vf")
+
+ self.environ = os.environ.copy()
+ self.environ['XDG_CONFIG_HOME'] = os.path.expanduser(deny_vf)
+
+ self.re_cjk = re.compile(r"([^:]+):\s*Noto\s+(Sans|Sans Mono|Serif) CJK")
+
+ def description(self):
+ return __doc__
+
+ def get_noto_cjk_vf_fonts(self):
+ """Get Noto CJK fonts"""
+
+ cjk_fonts = set()
+ cmd = ["fc-list", ":", "file", "family", "variable"]
+ try:
+ result = subprocess.run(cmd,stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ universal_newlines=True,
+ env=self.environ,
+ check=True)
+
+ except subprocess.CalledProcessError as exc:
+ sys.exit(f"Error running fc-list: {repr(exc)}")
+
+ for line in result.stdout.splitlines():
+ if 'variable=True' not in line:
+ continue
+
+ match = self.re_cjk.search(line)
+ if match:
+ cjk_fonts.add(match.group(1))
+
+ return sorted(cjk_fonts)
+
+ def check(self):
+ """Check for problems with CJK fonts"""
+
+ fonts = textwrap.indent("\n".join(self.get_noto_cjk_vf_fonts()), " ")
+ if not fonts:
+ return None
+
+ rel_file = os.path.relpath(__file__, os.getcwd())
+
+ msg = "=" * 77 + "\n"
+ msg += 'XeTeX is confused by "variable font" files listed below:\n'
+ msg += fonts + "\n"
+ msg += textwrap.dedent(f"""
+ For CJK pages in PDF, they need to be hidden from XeTeX by denylisting.
+ Or, CJK pages can be skipped by uninstalling texlive-xecjk.
+
+ For more info on denylisting, other options, and variable font, run:
+
+ tools/docs/check-variable-fonts.py -h
+ """)
+ msg += "=" * 77
+
+ return msg
diff --git a/tools/lib/python/kdoc/parse_data_structs.py b/tools/lib/python/kdoc/parse_data_structs.py
new file mode 100755
index 000000000000..25361996cd20
--- /dev/null
+++ b/tools/lib/python/kdoc/parse_data_structs.py
@@ -0,0 +1,482 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2016-2025 by Mauro Carvalho Chehab <mchehab@kernel.org>.
+# pylint: disable=R0912,R0915
+
+"""
+Parse a source file or header, creating ReStructured Text cross references.
+
+It accepts an optional file to change the default symbol reference or to
+suppress symbols from the output.
+
+It is capable of identifying defines, functions, structs, typedefs,
+enums and enum symbols and create cross-references for all of them.
+It is also capable of distinguish #define used for specifying a Linux
+ioctl.
+
+The optional rules file contains a set of rules like:
+
+ ignore ioctl VIDIOC_ENUM_FMT
+ replace ioctl VIDIOC_DQBUF vidioc_qbuf
+ replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
+"""
+
+import os
+import re
+import sys
+
+
+class ParseDataStructs:
+ """
+ Creates an enriched version of a Kernel header file with cross-links
+ to each C data structure type.
+
+ It is meant to allow having a more comprehensive documentation, where
+ uAPI headers will create cross-reference links to the code.
+
+ It is capable of identifying defines, functions, structs, typedefs,
+ enums and enum symbols and create cross-references for all of them.
+ It is also capable of distinguish #define used for specifying a Linux
+ ioctl.
+
+ By default, it create rules for all symbols and defines, but it also
+ allows parsing an exception file. Such file contains a set of rules
+ using the syntax below:
+
+ 1. Ignore rules:
+
+ ignore <type> <symbol>`
+
+ Removes the symbol from reference generation.
+
+ 2. Replace rules:
+
+ replace <type> <old_symbol> <new_reference>
+
+ Replaces how old_symbol with a new reference. The new_reference can be:
+
+ - A simple symbol name;
+ - A full Sphinx reference.
+
+ 3. Namespace rules
+
+ namespace <namespace>
+
+ Sets C namespace to be used during cross-reference generation. Can
+ be overridden by replace rules.
+
+ On ignore and replace rules, <type> can be:
+ - ioctl: for defines that end with _IO*, e.g. ioctl definitions
+ - define: for other defines
+ - symbol: for symbols defined within enums;
+ - typedef: for typedefs;
+ - enum: for the name of a non-anonymous enum;
+ - struct: for structs.
+
+ Examples:
+
+ ignore define __LINUX_MEDIA_H
+ ignore ioctl VIDIOC_ENUM_FMT
+ replace ioctl VIDIOC_DQBUF vidioc_qbuf
+ replace define V4L2_EVENT_MD_FL_HAVE_FRAME_SEQ :c:type:`v4l2_event_motion_det`
+
+ namespace MC
+ """
+
+ # Parser regexes with multiple ways to capture enums and structs
+ RE_ENUMS = [
+ re.compile(r"^\s*enum\s+([\w_]+)\s*\{"),
+ re.compile(r"^\s*enum\s+([\w_]+)\s*$"),
+ re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*\{"),
+ re.compile(r"^\s*typedef\s*enum\s+([\w_]+)\s*$"),
+ ]
+ RE_STRUCTS = [
+ re.compile(r"^\s*struct\s+([_\w][\w\d_]+)\s*\{"),
+ re.compile(r"^\s*struct\s+([_\w][\w\d_]+)$"),
+ re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)\s*\{"),
+ re.compile(r"^\s*typedef\s*struct\s+([_\w][\w\d_]+)$"),
+ ]
+
+ # FIXME: the original code was written a long time before Sphinx C
+ # domain to have multiple namespaces. To avoid to much turn at the
+ # existing hyperlinks, the code kept using "c:type" instead of the
+ # right types. To change that, we need to change the types not only
+ # here, but also at the uAPI media documentation.
+ DEF_SYMBOL_TYPES = {
+ "ioctl": {
+ "prefix": "\\ ",
+ "suffix": "\\ ",
+ "ref_type": ":ref",
+ "description": "IOCTL Commands",
+ },
+ "define": {
+ "prefix": "\\ ",
+ "suffix": "\\ ",
+ "ref_type": ":ref",
+ "description": "Macros and Definitions",
+ },
+ # We're calling each definition inside an enum as "symbol"
+ "symbol": {
+ "prefix": "\\ ",
+ "suffix": "\\ ",
+ "ref_type": ":ref",
+ "description": "Enumeration values",
+ },
+ "typedef": {
+ "prefix": "\\ ",
+ "suffix": "\\ ",
+ "ref_type": ":c:type",
+ "description": "Type Definitions",
+ },
+ # This is the description of the enum itself
+ "enum": {
+ "prefix": "\\ ",
+ "suffix": "\\ ",
+ "ref_type": ":c:type",
+ "description": "Enumerations",
+ },
+ "struct": {
+ "prefix": "\\ ",
+ "suffix": "\\ ",
+ "ref_type": ":c:type",
+ "description": "Structures",
+ },
+ }
+
+ def __init__(self, debug: bool = False):
+ """Initialize internal vars"""
+ self.debug = debug
+ self.data = ""
+
+ self.symbols = {}
+
+ self.namespace = None
+ self.ignore = []
+ self.replace = []
+
+ for symbol_type in self.DEF_SYMBOL_TYPES:
+ self.symbols[symbol_type] = {}
+
+ def read_exceptions(self, fname: str):
+ if not fname:
+ return
+
+ name = os.path.basename(fname)
+
+ with open(fname, "r", encoding="utf-8", errors="backslashreplace") as f:
+ for ln, line in enumerate(f):
+ ln += 1
+ line = line.strip()
+ if not line or line.startswith("#"):
+ continue
+
+ # ignore rules
+ match = re.match(r"^ignore\s+(\w+)\s+(\S+)", line)
+
+ if match:
+ self.ignore.append((ln, match.group(1), match.group(2)))
+ continue
+
+ # replace rules
+ match = re.match(r"^replace\s+(\S+)\s+(\S+)\s+(\S+)", line)
+ if match:
+ self.replace.append((ln, match.group(1), match.group(2),
+ match.group(3)))
+ continue
+
+ match = re.match(r"^namespace\s+(\S+)", line)
+ if match:
+ self.namespace = match.group(1)
+ continue
+
+ sys.exit(f"{name}:{ln}: invalid line: {line}")
+
+ def apply_exceptions(self):
+ """
+ Process exceptions file with rules to ignore or replace references.
+ """
+
+ # Handle ignore rules
+ for ln, c_type, symbol in self.ignore:
+ if c_type not in self.DEF_SYMBOL_TYPES:
+ sys.exit(f"{name}:{ln}: {c_type} is invalid")
+
+ d = self.symbols[c_type]
+ if symbol in d:
+ del d[symbol]
+
+ # Handle replace rules
+ for ln, c_type, old, new in self.replace:
+ if c_type not in self.DEF_SYMBOL_TYPES:
+ sys.exit(f"{name}:{ln}: {c_type} is invalid")
+
+ reftype = None
+
+ # Parse reference type when the type is specified
+
+ match = re.match(r"^\:c\:(\w+)\:\`(.+)\`", new)
+ if match:
+ reftype = f":c:{match.group(1)}"
+ new = match.group(2)
+ else:
+ match = re.search(r"(\:ref)\:\`(.+)\`", new)
+ if match:
+ reftype = match.group(1)
+ new = match.group(2)
+
+ # If the replacement rule doesn't have a type, get default
+ if not reftype:
+ reftype = self.DEF_SYMBOL_TYPES[c_type].get("ref_type")
+ if not reftype:
+ reftype = self.DEF_SYMBOL_TYPES[c_type].get("real_type")
+
+ new_ref = f"{reftype}:`{old} <{new}>`"
+
+ # Change self.symbols to use the replacement rule
+ if old in self.symbols[c_type]:
+ (_, ln) = self.symbols[c_type][old]
+ self.symbols[c_type][old] = (new_ref, ln)
+ else:
+ print(f"{name}:{ln}: Warning: can't find {old} {c_type}")
+
+ def store_type(self, ln, symbol_type: str, symbol: str,
+ ref_name: str = None, replace_underscores: bool = True):
+ """
+ Stores a new symbol at self.symbols under symbol_type.
+
+ By default, underscores are replaced by "-"
+ """
+ defs = self.DEF_SYMBOL_TYPES[symbol_type]
+
+ prefix = defs.get("prefix", "")
+ suffix = defs.get("suffix", "")
+ ref_type = defs.get("ref_type")
+
+ # Determine ref_link based on symbol type
+ if ref_type or self.namespace:
+ if not ref_name:
+ ref_name = symbol.lower()
+
+ # c-type references don't support hash
+ if ref_type == ":ref" and replace_underscores:
+ ref_name = ref_name.replace("_", "-")
+
+ # C domain references may have namespaces
+ if ref_type.startswith(":c:"):
+ if self.namespace:
+ ref_name = f"{self.namespace}.{ref_name}"
+
+ if ref_type:
+ ref_link = f"{ref_type}:`{symbol} <{ref_name}>`"
+ else:
+ ref_link = f"`{symbol} <{ref_name}>`"
+ else:
+ ref_link = symbol
+
+ self.symbols[symbol_type][symbol] = (f"{prefix}{ref_link}{suffix}", ln)
+
+ def store_line(self, line):
+ """Stores a line at self.data, properly indented"""
+ line = " " + line.expandtabs()
+ self.data += line.rstrip(" ")
+
+ def parse_file(self, file_in: str, exceptions: str = None):
+ """Reads a C source file and get identifiers"""
+ self.data = ""
+ is_enum = False
+ is_comment = False
+ multiline = ""
+
+ self.read_exceptions(exceptions)
+
+ with open(file_in, "r",
+ encoding="utf-8", errors="backslashreplace") as f:
+ for line_no, line in enumerate(f):
+ self.store_line(line)
+ line = line.strip("\n")
+
+ # Handle continuation lines
+ if line.endswith(r"\\"):
+ multiline += line[-1]
+ continue
+
+ if multiline:
+ line = multiline + line
+ multiline = ""
+
+ # Handle comments. They can be multilined
+ if not is_comment:
+ if re.search(r"/\*.*", line):
+ is_comment = True
+ else:
+ # Strip C99-style comments
+ line = re.sub(r"(//.*)", "", line)
+
+ if is_comment:
+ if re.search(r".*\*/", line):
+ is_comment = False
+ else:
+ multiline = line
+ continue
+
+ # At this point, line variable may be a multilined statement,
+ # if lines end with \ or if they have multi-line comments
+ # With that, it can safely remove the entire comments,
+ # and there's no need to use re.DOTALL for the logic below
+
+ line = re.sub(r"(/\*.*\*/)", "", line)
+ if not line.strip():
+ continue
+
+ # It can be useful for debug purposes to print the file after
+ # having comments stripped and multi-lines grouped.
+ if self.debug > 1:
+ print(f"line {line_no + 1}: {line}")
+
+ # Now the fun begins: parse each type and store it.
+
+ # We opted for a two parsing logic here due to:
+ # 1. it makes easier to debug issues not-parsed symbols;
+ # 2. we want symbol replacement at the entire content, not
+ # just when the symbol is detected.
+
+ if is_enum:
+ match = re.match(r"^\s*([_\w][\w\d_]+)\s*[\,=]?", line)
+ if match:
+ self.store_type(line_no, "symbol", match.group(1))
+ if "}" in line:
+ is_enum = False
+ continue
+
+ match = re.match(r"^\s*#\s*define\s+([\w_]+)\s+_IO", line)
+ if match:
+ self.store_type(line_no, "ioctl", match.group(1),
+ replace_underscores=False)
+ continue
+
+ match = re.match(r"^\s*#\s*define\s+([\w_]+)(\s+|$)", line)
+ if match:
+ self.store_type(line_no, "define", match.group(1))
+ continue
+
+ match = re.match(r"^\s*typedef\s+([_\w][\w\d_]+)\s+(.*)\s+([_\w][\w\d_]+);",
+ line)
+ if match:
+ name = match.group(2).strip()
+ symbol = match.group(3)
+ self.store_type(line_no, "typedef", symbol, ref_name=name)
+ continue
+
+ for re_enum in self.RE_ENUMS:
+ match = re_enum.match(line)
+ if match:
+ self.store_type(line_no, "enum", match.group(1))
+ is_enum = True
+ break
+
+ for re_struct in self.RE_STRUCTS:
+ match = re_struct.match(line)
+ if match:
+ self.store_type(line_no, "struct", match.group(1))
+ break
+
+ self.apply_exceptions()
+
+ def debug_print(self):
+ """
+ Print debug information containing the replacement rules per symbol.
+ To make easier to check, group them per type.
+ """
+ if not self.debug:
+ return
+
+ for c_type, refs in self.symbols.items():
+ if not refs: # Skip empty dictionaries
+ continue
+
+ print(f"{c_type}:")
+
+ for symbol, (ref, ln) in sorted(refs.items()):
+ print(f" #{ln:<5d} {symbol} -> {ref}")
+
+ print()
+
+ def gen_output(self):
+ """Write the formatted output to a file."""
+
+ # Avoid extra blank lines
+ text = re.sub(r"\s+$", "", self.data) + "\n"
+ text = re.sub(r"\n\s+\n", "\n\n", text)
+
+ # Escape Sphinx special characters
+ text = re.sub(r"([\_\`\*\<\>\&\\\\:\/\|\%\$\#\{\}\~\^])", r"\\\1", text)
+
+ # Source uAPI files may have special notes. Use bold font for them
+ text = re.sub(r"DEPRECATED", "**DEPRECATED**", text)
+
+ # Delimiters to catch the entire symbol after escaped
+ start_delim = r"([ \n\t\(=\*\@])"
+ end_delim = r"(\s|,|\\=|\\:|\;|\)|\}|\{)"
+
+ # Process all reference types
+ for ref_dict in self.symbols.values():
+ for symbol, (replacement, _) in ref_dict.items():
+ symbol = re.escape(re.sub(r"([\_\`\*\<\>\&\\\\:\/])", r"\\\1", symbol))
+ text = re.sub(fr'{start_delim}{symbol}{end_delim}',
+ fr'\1{replacement}\2', text)
+
+ # Remove "\ " where not needed: before spaces and at the end of lines
+ text = re.sub(r"\\ ([\n ])", r"\1", text)
+ text = re.sub(r" \\ ", " ", text)
+
+ return text
+
+ def gen_toc(self):
+ """
+ Create a list of symbols to be part of a TOC contents table
+ """
+ text = []
+
+ # Sort symbol types per description
+ symbol_descriptions = []
+ for k, v in self.DEF_SYMBOL_TYPES.items():
+ symbol_descriptions.append((v['description'], k))
+
+ symbol_descriptions.sort()
+
+ # Process each category
+ for description, c_type in symbol_descriptions:
+
+ refs = self.symbols[c_type]
+ if not refs: # Skip empty categories
+ continue
+
+ text.append(f"{description}")
+ text.append("-" * len(description))
+ text.append("")
+
+ # Sort symbols alphabetically
+ for symbol, (ref, ln) in sorted(refs.items()):
+ text.append(f"- LINENO_{ln}: {ref}")
+
+ text.append("") # Add empty line between categories
+
+ return "\n".join(text)
+
+ def write_output(self, file_in: str, file_out: str, toc: bool):
+ title = os.path.basename(file_in)
+
+ if toc:
+ text = self.gen_toc()
+ else:
+ text = self.gen_output()
+
+ with open(file_out, "w", encoding="utf-8", errors="backslashreplace") as f:
+ f.write(".. -*- coding: utf-8; mode: rst -*-\n\n")
+ f.write(f"{title}\n")
+ f.write("=" * len(title) + "\n\n")
+
+ if not toc:
+ f.write(".. parsed-literal::\n\n")
+
+ f.write(text)
diff --git a/tools/lib/python/kdoc/python_version.py b/tools/lib/python/kdoc/python_version.py
new file mode 100644
index 000000000000..e83088013db2
--- /dev/null
+++ b/tools/lib/python/kdoc/python_version.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2017-2025 Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
+
+"""
+Handle Python version check logic.
+
+Not all Python versions are supported by scripts. Yet, on some cases,
+like during documentation build, a newer version of python could be
+available.
+
+This class allows checking if the minimal requirements are followed.
+
+Better than that, PythonVersion.check_python() not only checks the minimal
+requirements, but it automatically switches to a the newest available
+Python version if present.
+
+"""
+
+import os
+import re
+import subprocess
+import shlex
+import sys
+
+from glob import glob
+from textwrap import indent
+
+class PythonVersion:
+ """
+ Ancillary methods that checks for missing dependencies for different
+ types of types, like binaries, python modules, rpm deps, etc.
+ """
+
+ def __init__(self, version):
+ """Ïnitialize self.version tuple from a version string"""
+ self.version = self.parse_version(version)
+
+ @staticmethod
+ def parse_version(version):
+ """Convert a major.minor.patch version into a tuple"""
+ return tuple(int(x) for x in version.split("."))
+
+ @staticmethod
+ def ver_str(version):
+ """Returns a version tuple as major.minor.patch"""
+ return ".".join([str(x) for x in version])
+
+ @staticmethod
+ def cmd_print(cmd, max_len=80):
+ cmd_line = []
+
+ for w in cmd:
+ w = shlex.quote(w)
+
+ if cmd_line:
+ if not max_len or len(cmd_line[-1]) + len(w) < max_len:
+ cmd_line[-1] += " " + w
+ continue
+ else:
+ cmd_line[-1] += " \\"
+ cmd_line.append(w)
+ else:
+ cmd_line.append(w)
+
+ return "\n ".join(cmd_line)
+
+ def __str__(self):
+ """Returns a version tuple as major.minor.patch from self.version"""
+ return self.ver_str(self.version)
+
+ @staticmethod
+ def get_python_version(cmd):
+ """
+ Get python version from a Python binary. As we need to detect if
+ are out there newer python binaries, we can't rely on sys.release here.
+ """
+
+ kwargs = {}
+ if sys.version_info < (3, 7):
+ kwargs['universal_newlines'] = True
+ else:
+ kwargs['text'] = True
+
+ result = subprocess.run([cmd, "--version"],
+ stdout = subprocess.PIPE,
+ stderr = subprocess.PIPE,
+ **kwargs, check=False)
+
+ version = result.stdout.strip()
+
+ match = re.search(r"(\d+\.\d+\.\d+)", version)
+ if match:
+ return PythonVersion.parse_version(match.group(1))
+
+ print(f"Can't parse version {version}")
+ return (0, 0, 0)
+
+ @staticmethod
+ def find_python(min_version):
+ """
+ Detect if are out there any python 3.xy version newer than the
+ current one.
+
+ Note: this routine is limited to up to 2 digits for python3. We
+ may need to update it one day, hopefully on a distant future.
+ """
+ patterns = [
+ "python3.[0-9][0-9]",
+ "python3.[0-9]",
+ ]
+
+ python_cmd = []
+
+ # Seek for a python binary newer than min_version
+ for path in os.getenv("PATH", "").split(":"):
+ for pattern in patterns:
+ for cmd in glob(os.path.join(path, pattern)):
+ if os.path.isfile(cmd) and os.access(cmd, os.X_OK):
+ version = PythonVersion.get_python_version(cmd)
+ if version >= min_version:
+ python_cmd.append((version, cmd))
+
+ return sorted(python_cmd, reverse=True)
+
+ @staticmethod
+ def check_python(min_version, show_alternatives=False, bail_out=False,
+ success_on_error=False):
+ """
+ Check if the current python binary satisfies our minimal requirement
+ for Sphinx build. If not, re-run with a newer version if found.
+ """
+ cur_ver = sys.version_info[:3]
+ if cur_ver >= min_version:
+ ver = PythonVersion.ver_str(cur_ver)
+ return
+
+ python_ver = PythonVersion.ver_str(cur_ver)
+
+ available_versions = PythonVersion.find_python(min_version)
+ if not available_versions:
+ print(f"ERROR: Python version {python_ver} is not supported anymore\n")
+ print(" Can't find a new version. This script may fail")
+ return
+
+ script_path = os.path.abspath(sys.argv[0])
+
+ # Check possible alternatives
+ if available_versions:
+ new_python_cmd = available_versions[0][1]
+ else:
+ new_python_cmd = None
+
+ if show_alternatives and available_versions:
+ print("You could run, instead:")
+ for _, cmd in available_versions:
+ args = [cmd, script_path] + sys.argv[1:]
+
+ cmd_str = indent(PythonVersion.cmd_print(args), " ")
+ print(f"{cmd_str}\n")
+
+ if bail_out:
+ msg = f"Python {python_ver} not supported. Bailing out"
+ if success_on_error:
+ print(msg, file=sys.stderr)
+ sys.exit(0)
+ else:
+ sys.exit(msg)
+
+ print(f"Python {python_ver} not supported. Changing to {new_python_cmd}")
+
+ # Restart script using the newer version
+ args = [new_python_cmd, script_path] + sys.argv[1:]
+
+ try:
+ os.execv(new_python_cmd, args)
+ except OSError as e:
+ sys.exit(f"Failed to restart with {new_python_cmd}: {e}")
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index 9ef569492560..ddaeb4eb3e24 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -75,6 +75,9 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
size_t ci, cj, ei;
int cmp;
+ if (!excludes->cnt)
+ return;
+
ci = cj = ei = 0;
while (ci < cmds->cnt && ei < excludes->cnt) {
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile
index a1f5e388644d..41aa7a324ff4 100644
--- a/tools/lib/thermal/Makefile
+++ b/tools/lib/thermal/Makefile
@@ -46,8 +46,12 @@ else
CFLAGS := -g -Wall
endif
+NL3_CFLAGS = $(shell pkg-config --cflags libnl-3.0 2>/dev/null)
+ifeq ($(NL3_CFLAGS),)
+NL3_CFLAGS = -I/usr/include/libnl3
+endif
+
INCLUDES = \
--I/usr/include/libnl3 \
-I$(srctree)/tools/lib/thermal/include \
-I$(srctree)/tools/lib/ \
-I$(srctree)/tools/include \
@@ -59,6 +63,7 @@ INCLUDES = \
override CFLAGS += $(EXTRA_WARNINGS)
override CFLAGS += -Werror -Wall
override CFLAGS += -fPIC
+override CFLAGS += $(NL3_CFLAGS)
override CFLAGS += $(INCLUDES)
override CFLAGS += -fvisibility=hidden
override CFGLAS += -Wl,-L.
@@ -134,7 +139,7 @@ endef
install_lib: libs
$(call QUIET_INSTALL, $(LIBTHERMAL_ALL)) \
$(call do_install_mkdir,$(libdir_SQ)); \
- cp -fpR $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ)
+ cp -fR --preserve=mode,timestamp $(LIBTHERMAL_ALL) $(DESTDIR)$(libdir_SQ)
install_headers:
$(call QUIET_INSTALL, headers) \
diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map
index d657176aa47f..1d3d0c04e4b6 100644
--- a/tools/lib/thermal/libthermal.map
+++ b/tools/lib/thermal/libthermal.map
@@ -1,6 +1,5 @@
LIBTHERMAL_0.0.1 {
global:
- thermal_init;
for_each_thermal_zone;
for_each_thermal_trip;
for_each_thermal_cdev;
@@ -9,9 +8,12 @@ LIBTHERMAL_0.0.1 {
thermal_zone_find_by_id;
thermal_zone_discover;
thermal_init;
+ thermal_exit;
+ thermal_events_exit;
thermal_events_init;
thermal_events_handle;
thermal_events_fd;
+ thermal_cmd_exit;
thermal_cmd_init;
thermal_cmd_get_tz;
thermal_cmd_get_cdev;
@@ -22,6 +24,7 @@ LIBTHERMAL_0.0.1 {
thermal_cmd_threshold_add;
thermal_cmd_threshold_delete;
thermal_cmd_threshold_flush;
+ thermal_sampling_exit;
thermal_sampling_init;
thermal_sampling_handle;
thermal_sampling_fd;
diff --git a/tools/mm/page_owner_sort.c b/tools/mm/page_owner_sort.c
index 880e36df0c11..14c67e9e84c4 100644
--- a/tools/mm/page_owner_sort.c
+++ b/tools/mm/page_owner_sort.c
@@ -13,6 +13,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -23,9 +24,6 @@
#include <linux/types.h>
#include <getopt.h>
-#define bool int
-#define true 1
-#define false 0
#define TASK_COMM_LEN 16
struct block_list {
@@ -669,14 +667,15 @@ int main(int argc, char **argv)
{ "pid", required_argument, NULL, 1 },
{ "tgid", required_argument, NULL, 2 },
{ "name", required_argument, NULL, 3 },
- { "cull", required_argument, NULL, 4 },
- { "sort", required_argument, NULL, 5 },
+ { "cull", required_argument, NULL, 4 },
+ { "sort", required_argument, NULL, 5 },
+ { "help", no_argument, NULL, 'h' },
{ 0, 0, 0, 0},
};
compare_flag = COMP_NO_FLAG;
- while ((opt = getopt_long(argc, argv, "admnpstP", longopts, NULL)) != -1)
+ while ((opt = getopt_long(argc, argv, "admnpstPh", longopts, NULL)) != -1)
switch (opt) {
case 'a':
compare_flag |= COMP_ALLOC;
@@ -702,6 +701,9 @@ int main(int argc, char **argv)
case 'n':
compare_flag |= COMP_COMM;
break;
+ case 'h':
+ usage();
+ exit(0);
case 1:
filter = filter | FILTER_PID;
fc.pids = parse_nums_list(optarg, &fc.pids_size);
diff --git a/tools/mm/slabinfo.c b/tools/mm/slabinfo.c
index 1433eff99feb..80cdbd3db82d 100644
--- a/tools/mm/slabinfo.c
+++ b/tools/mm/slabinfo.c
@@ -155,6 +155,7 @@ static void usage(void)
static unsigned long read_obj(const char *name)
{
+ size_t len;
FILE *f = fopen(name, "r");
if (!f) {
@@ -165,8 +166,10 @@ static unsigned long read_obj(const char *name)
if (!fgets(buffer, sizeof(buffer), f))
buffer[0] = 0;
fclose(f);
- if (buffer[strlen(buffer)] == '\n')
- buffer[strlen(buffer)] = 0;
+ len = strlen(buffer);
+
+ if (len > 0 && buffer[len - 1] == '\n')
+ buffer[len - 1] = 0;
}
return strlen(buffer);
}
diff --git a/tools/net/sunrpc/xdrgen/generators/__init__.py b/tools/net/sunrpc/xdrgen/generators/__init__.py
index b98574a36a4a..e22632cf38fb 100644
--- a/tools/net/sunrpc/xdrgen/generators/__init__.py
+++ b/tools/net/sunrpc/xdrgen/generators/__init__.py
@@ -2,7 +2,7 @@
"""Define a base code generator class"""
-import sys
+from pathlib import Path
from jinja2 import Environment, FileSystemLoader, Template
from xdr_ast import _XdrAst, Specification, _RpcProgram, _XdrTypeSpecifier
@@ -14,8 +14,11 @@ def create_jinja2_environment(language: str, xdr_type: str) -> Environment:
"""Open a set of templates based on output language"""
match language:
case "C":
+ templates_dir = (
+ Path(__file__).parent.parent / "templates" / language / xdr_type
+ )
environment = Environment(
- loader=FileSystemLoader(sys.path[0] + "/templates/C/" + xdr_type + "/"),
+ loader=FileSystemLoader(templates_dir),
trim_blocks=True,
lstrip_blocks=True,
)
@@ -48,9 +51,7 @@ def find_xdr_program_name(root: Specification) -> str:
def header_guard_infix(filename: str) -> str:
"""Extract the header guard infix from the specification filename"""
- basename = filename.split("/")[-1]
- program = basename.replace(".x", "")
- return program.upper()
+ return Path(filename).stem.upper()
def kernel_c_type(spec: _XdrTypeSpecifier) -> str:
diff --git a/tools/net/sunrpc/xdrgen/generators/union.py b/tools/net/sunrpc/xdrgen/generators/union.py
index 2cca00e279cd..ad1f214ef22a 100644
--- a/tools/net/sunrpc/xdrgen/generators/union.py
+++ b/tools/net/sunrpc/xdrgen/generators/union.py
@@ -8,7 +8,7 @@ from jinja2 import Environment
from generators import SourceGenerator
from generators import create_jinja2_environment, get_jinja2_template
-from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid, get_header_name
+from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid, _XdrString, get_header_name
from xdr_ast import _XdrDeclaration, _XdrCaseSpec, public_apis, big_endian
@@ -40,13 +40,20 @@ def emit_union_case_spec_definition(
"""Emit a definition for an XDR union's case arm"""
if isinstance(node.arm, _XdrVoid):
return
- assert isinstance(node.arm, _XdrBasic)
+ if isinstance(node.arm, _XdrString):
+ type_name = "char *"
+ classifier = ""
+ else:
+ type_name = node.arm.spec.type_name
+ classifier = node.arm.spec.c_classifier
+
+ assert isinstance(node.arm, (_XdrBasic, _XdrString))
template = get_jinja2_template(environment, "definition", "case_spec")
print(
template.render(
name=node.arm.name,
- type=node.arm.spec.type_name,
- classifier=node.arm.spec.c_classifier,
+ type=type_name,
+ classifier=classifier,
)
)
@@ -84,6 +91,12 @@ def emit_union_case_spec_decoder(
if isinstance(node.arm, _XdrVoid):
return
+ if isinstance(node.arm, _XdrString):
+ type_name = "char *"
+ classifier = ""
+ else:
+ type_name = node.arm.spec.type_name
+ classifier = node.arm.spec.c_classifier
if big_endian_discriminant:
template = get_jinja2_template(environment, "decoder", "case_spec_be")
@@ -92,13 +105,13 @@ def emit_union_case_spec_decoder(
for case in node.values:
print(template.render(case=case))
- assert isinstance(node.arm, _XdrBasic)
+ assert isinstance(node.arm, (_XdrBasic, _XdrString))
template = get_jinja2_template(environment, "decoder", node.arm.template)
print(
template.render(
name=node.arm.name,
- type=node.arm.spec.type_name,
- classifier=node.arm.spec.c_classifier,
+ type=type_name,
+ classifier=classifier,
)
)
@@ -169,7 +182,10 @@ def emit_union_case_spec_encoder(
if isinstance(node.arm, _XdrVoid):
return
-
+ if isinstance(node.arm, _XdrString):
+ type_name = "char *"
+ else:
+ type_name = node.arm.spec.type_name
if big_endian_discriminant:
template = get_jinja2_template(environment, "encoder", "case_spec_be")
else:
@@ -181,7 +197,7 @@ def emit_union_case_spec_encoder(
print(
template.render(
name=node.arm.name,
- type=node.arm.spec.type_name,
+ type=type_name,
)
)
diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j2
index 5bf010665f84..3dbd724d7f17 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/close.j2
@@ -1,3 +1,3 @@
{# SPDX-License-Identifier: GPL-2.0 #}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j2
index 5bf010665f84..3dbd724d7f17 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/close.j2
@@ -1,3 +1,3 @@
{# SPDX-License-Identifier: GPL-2.0 #}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j2
index 5bf010665f84..3dbd724d7f17 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/close.j2
@@ -1,3 +1,3 @@
{# SPDX-License-Identifier: GPL-2.0 #}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j2
index 9a814de54ae8..65698e20d8cd 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_opaque.j2
@@ -2,5 +2,5 @@
{% if annotate %}
/* member {{ name }} (variable-length opaque) */
{% endif %}
- if (!xdrgen_decode_opaque(xdr, (opaque *)ptr, {{ maxsize }}))
+ if (!xdrgen_decode_opaque(xdr, &ptr->{{ name }}, {{ maxsize }}))
return false;
diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j2
index 5bf010665f84..3dbd724d7f17 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/close.j2
@@ -1,3 +1,3 @@
{# SPDX-License-Identifier: GPL-2.0 #}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j2
index da4709403dc9..b215e157dfa7 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/basic.j2
@@ -14,4 +14,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr)
/* (basic) */
{% endif %}
return xdrgen_decode_{{ type }}(xdr, ptr);
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j2
index d7c80e472fe3..c8953719e626 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_array.j2
@@ -22,4 +22,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
return false;
}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2
index 8b4ff08c49e5..c854fc8c74e3 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/fixed_length_opaque.j2
@@ -13,5 +13,5 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
{% if annotate %}
/* (fixed-length opaque) */
{% endif %}
- return xdr_stream_decode_opaque_fixed(xdr, ptr, {{ size }}) >= 0;
-};
+ return xdr_stream_decode_opaque_fixed(xdr, ptr, {{ size }}) == 0;
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2
index 56c5a17d6a70..bcbc1758aae9 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2
@@ -14,4 +14,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
/* (variable-length string) */
{% endif %}
return xdrgen_decode_string(xdr, ptr, {{ maxsize }});
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j2
index e74ffdd98463..a59cc1f38eed 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_array.j2
@@ -23,4 +23,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
if (!xdrgen_decode_{{ type }}(xdr, &ptr->element[i]))
return false;
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j2
index f28f8b228ad5..eb05f53e1041 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_opaque.j2
@@ -14,4 +14,4 @@ xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ classifier }}{{ name }} *ptr
/* (variable-length opaque) */
{% endif %}
return xdrgen_decode_opaque(xdr, ptr, {{ maxsize }});
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j2
index 35effe67e4ef..0d21dd0b723a 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/basic.j2
@@ -18,4 +18,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
/* (basic) */
{% endif %}
return xdrgen_encode_{{ type }}(xdr, value);
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j2
index 95202ad5ad2d..ec8cd6509514 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_array.j2
@@ -22,4 +22,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
return false;
}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j2
index 9c66a11b9912..b53fa87e1858 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/fixed_length_opaque.j2
@@ -14,4 +14,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
/* (fixed-length opaque) */
{% endif %}
return xdr_stream_encode_opaque_fixed(xdr, value, {{ size }}) >= 0;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2
index 3d490ff180d0..28b81f1d0bd6 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2
@@ -14,4 +14,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
/* (variable-length string) */
{% endif %}
return xdr_stream_encode_opaque(xdr, value.data, value.len) >= 0;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j2
index 2d2384f64918..ff093c281d51 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_array.j2
@@ -27,4 +27,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
{% endif %}
return false;
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j2
index 8508f13c95b9..2e89592fa702 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_opaque.j2
@@ -14,4 +14,4 @@ xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const {{ classifier }}{{ name }
/* (variable-length opaque) */
{% endif %}
return xdr_stream_encode_opaque(xdr, value.data, value.len) >= 0;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/declaration/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/declaration/close.j2
new file mode 100644
index 000000000000..816291184e8c
--- /dev/null
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/declaration/close.j2
@@ -0,0 +1,4 @@
+{# SPDX-License-Identifier: GPL-2.0 #}
+
+bool xdrgen_decode_{{ name }}(struct xdr_stream *xdr, struct {{ name }} *ptr);
+bool xdrgen_encode_{{ name }}(struct xdr_stream *xdr, const struct {{ name }} *value);
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j2
index fdc2dfd1843b..39d8d6c5094d 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/close.j2
@@ -1,4 +1,4 @@
{# SPDX-License-Identifier: GPL-2.0 #}
}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j2
index fdc2dfd1843b..39d8d6c5094d 100644
--- a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j2
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/close.j2
@@ -1,4 +1,4 @@
{# SPDX-License-Identifier: GPL-2.0 #}
}
return true;
-};
+}
diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/string.j2
new file mode 100644
index 000000000000..2f035a64f1f4
--- /dev/null
+++ b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/string.j2
@@ -0,0 +1,6 @@
+{# SPDX-License-Identifier: GPL-2.0 #}
+{% if annotate %}
+ /* member {{ name }} (variable-length string) */
+{% endif %}
+ if (!xdrgen_encode_string(xdr, ptr->u.{{ name }}, {{ maxsize }}))
+ return false;
diff --git a/tools/net/sunrpc/xdrgen/xdrgen b/tools/net/sunrpc/xdrgen/xdrgen
index 43762be39252..3afd0547d67c 100755
--- a/tools/net/sunrpc/xdrgen/xdrgen
+++ b/tools/net/sunrpc/xdrgen/xdrgen
@@ -10,8 +10,13 @@ __license__ = "GPL-2.0 only"
__version__ = "0.2"
import sys
+from pathlib import Path
import argparse
+_XDRGEN_DIR = Path(__file__).resolve().parent
+if str(_XDRGEN_DIR) not in sys.path:
+ sys.path.insert(0, str(_XDRGEN_DIR))
+
from subcmds import definitions
from subcmds import declarations
from subcmds import lint
diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
index 211df5a93ad9..7736b492f559 100644
--- a/tools/net/ynl/Makefile
+++ b/tools/net/ynl/Makefile
@@ -12,10 +12,13 @@ endif
libdir ?= $(prefix)/$(libdir_relative)
includedir ?= $(prefix)/include
-SUBDIRS = lib generated samples
+SPECDIR=../../../Documentation/netlink/specs
+
+SUBDIRS = lib generated samples ynltool tests
all: $(SUBDIRS) libynl.a
+ynltool: | lib generated libynl.a
samples: | lib generated
libynl.a: | lib generated
@echo -e "\tAR $@"
@@ -48,5 +51,27 @@ install: libynl.a lib/*.h
@echo -e "\tINSTALL pyynl"
@pip install --prefix=$(DESTDIR)$(prefix) .
@make -C generated install
+ @make -C tests install
+
+run_tests:
+ @$(MAKE) -C tests run_tests
+
+lint:
+ yamllint $(SPECDIR)
+
+schema_check:
+ @N=1; \
+ for spec in $(SPECDIR)/*.yaml ; do \
+ NAME=$$(basename $$spec) ; \
+ OUTPUT=$$(./pyynl/cli.py --spec $$spec --validate) ; \
+ if [ $$? -eq 0 ] ; then \
+ echo "ok $$N $$NAME schema validation" ; \
+ else \
+ echo "not ok $$N $$NAME schema validation" ; \
+ echo "$$OUTPUT" ; \
+ echo ; \
+ fi ; \
+ N=$$((N+1)) ; \
+ done
-.PHONY: all clean distclean install $(SUBDIRS)
+.PHONY: all clean distclean install run_tests lint schema_check $(SUBDIRS)
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index 90686e241157..865fd2e8519e 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -31,6 +31,7 @@ CFLAGS_ovpn:=$(call get_hdr_inc,_LINUX_OVPN_H,ovpn.h)
CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
+CFLAGS_psp:=$(call get_hdr_inc,_LINUX_PSP_H,psp.h)
CFLAGS_rt-addr:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
$(call get_hdr_inc,__LINUX_IF_ADDR_H,if_addr.h)
CFLAGS_rt-link:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h
index 824777d7e05e..ced7dce44efb 100644
--- a/tools/net/ynl/lib/ynl-priv.h
+++ b/tools/net/ynl/lib/ynl-priv.h
@@ -106,7 +106,6 @@ ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
struct nlmsghdr *
ynl_gemsg_start_dump(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
-int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr);
int ynl_submsg_failed(struct ynl_parse_arg *yarg, const char *field_name,
const char *sel_name);
@@ -314,7 +313,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str)
struct nlattr *attr;
size_t len;
- len = strlen(str);
+ len = strlen(str) + 1;
if (__ynl_attr_put_overflow(nlh, len))
return;
@@ -322,7 +321,7 @@ ynl_attr_put_str(struct nlmsghdr *nlh, unsigned int attr_type, const char *str)
attr->nla_type = attr_type;
strcpy((char *)ynl_attr_data(attr), str);
- attr->nla_len = NLA_HDRLEN + NLA_ALIGN(len);
+ attr->nla_len = NLA_HDRLEN + len;
nlh->nlmsg_len += NLMSG_ALIGN(attr->nla_len);
}
@@ -467,4 +466,13 @@ ynl_attr_put_sint(struct nlmsghdr *nlh, __u16 type, __s64 data)
else
ynl_attr_put_s64(nlh, type, data);
}
+
+int __ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr,
+ unsigned int type);
+
+static inline int ynl_attr_validate(struct ynl_parse_arg *yarg,
+ const struct nlattr *attr)
+{
+ return __ynl_attr_validate(yarg, attr, ynl_attr_type(attr));
+}
#endif
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index 2a169c3c0797..2bcd781111d7 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -360,15 +360,15 @@ static int ynl_cb_done(const struct nlmsghdr *nlh, struct ynl_parse_arg *yarg)
/* Attribute validation */
-int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr)
+int __ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr,
+ unsigned int type)
{
const struct ynl_policy_attr *policy;
- unsigned int type, len;
unsigned char *data;
+ unsigned int len;
data = ynl_attr_data(attr);
len = ynl_attr_data_len(attr);
- type = ynl_attr_type(attr);
if (type > yarg->rsp_policy->max_attr) {
yerr(yarg->ys, YNL_ERROR_INTERNAL,
"Internal error, validating unknown attribute");
diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py
index 8c192e900bd3..af02a5b7e5a2 100755
--- a/tools/net/ynl/pyynl/cli.py
+++ b/tools/net/ynl/pyynl/cli.py
@@ -7,9 +7,10 @@ import os
import pathlib
import pprint
import sys
+import textwrap
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
-from lib import YnlFamily, Netlink, NlError
+from lib import YnlFamily, Netlink, NlError, SpecFamily
sys_schema_dir='/usr/share/ynl'
relative_schema_dir='../../../../Documentation/netlink'
@@ -39,6 +40,60 @@ class YnlEncoder(json.JSONEncoder):
return json.JSONEncoder.default(self, obj)
+def print_attr_list(ynl, attr_names, attr_set, indent=2):
+ """Print a list of attributes with their types and documentation."""
+ prefix = ' ' * indent
+ for attr_name in attr_names:
+ if attr_name in attr_set.attrs:
+ attr = attr_set.attrs[attr_name]
+ attr_info = f'{prefix}- {attr_name}: {attr.type}'
+ if 'enum' in attr.yaml:
+ enum_name = attr.yaml['enum']
+ attr_info += f" (enum: {enum_name})"
+ # Print enum values if available
+ if enum_name in ynl.consts:
+ const = ynl.consts[enum_name]
+ enum_values = list(const.entries.keys())
+ attr_info += f"\n{prefix} {const.type.capitalize()}: {', '.join(enum_values)}"
+
+ # Show nested attributes reference and recursively display them
+ nested_set_name = None
+ if attr.type == 'nest' and 'nested-attributes' in attr.yaml:
+ nested_set_name = attr.yaml['nested-attributes']
+ attr_info += f" -> {nested_set_name}"
+
+ if attr.yaml.get('doc'):
+ doc_text = textwrap.indent(attr.yaml['doc'], prefix + ' ')
+ attr_info += f"\n{doc_text}"
+ print(attr_info)
+
+ # Recursively show nested attributes
+ if nested_set_name in ynl.attr_sets:
+ nested_set = ynl.attr_sets[nested_set_name]
+ # Filter out 'unspec' and other unused attrs
+ nested_names = [n for n in nested_set.attrs.keys()
+ if nested_set.attrs[n].type != 'unused']
+ if nested_names:
+ print_attr_list(ynl, nested_names, nested_set, indent + 4)
+
+
+def print_mode_attrs(ynl, mode, mode_spec, attr_set, print_request=True):
+ """Print a given mode (do/dump/event/notify)."""
+ mode_title = mode.capitalize()
+
+ if print_request and 'request' in mode_spec and 'attributes' in mode_spec['request']:
+ print(f'\n{mode_title} request attributes:')
+ print_attr_list(ynl, mode_spec['request']['attributes'], attr_set)
+
+ if 'reply' in mode_spec and 'attributes' in mode_spec['reply']:
+ print(f'\n{mode_title} reply attributes:')
+ print_attr_list(ynl, mode_spec['reply']['attributes'], attr_set)
+
+ if 'attributes' in mode_spec:
+ print(f'\n{mode_title} attributes:')
+ print_attr_list(ynl, mode_spec['attributes'], attr_set)
+
+
def main():
description = """
YNL CLI utility - a general purpose netlink utility that uses YAML
@@ -70,6 +125,9 @@ def main():
group.add_argument('--dump', dest='dump', metavar='DUMP-OPERATION', type=str)
group.add_argument('--list-ops', action='store_true')
group.add_argument('--list-msgs', action='store_true')
+ group.add_argument('--list-attrs', dest='list_attrs', metavar='OPERATION', type=str,
+ help='List attributes for an operation')
+ group.add_argument('--validate', action='store_true')
parser.add_argument('--duration', dest='duration', type=int,
help='when subscribed, watch for DURATION seconds')
@@ -111,15 +169,25 @@ def main():
if args.family:
spec = f"{spec_dir()}/{args.family}.yaml"
- if args.schema is None and spec.startswith(sys_schema_dir):
- args.schema = '' # disable schema validation when installed
- if args.process_unknown is None:
- args.process_unknown = True
else:
spec = args.spec
if not os.path.isfile(spec):
raise Exception(f"Spec file {spec} does not exist")
+ if args.validate:
+ try:
+ SpecFamily(spec, args.schema)
+ except Exception as error:
+ print(error)
+ exit(1)
+ return
+
+ if args.family: # set behaviour when using installed specs
+ if args.schema is None and spec.startswith(sys_schema_dir):
+ args.schema = '' # disable schema validation when installed
+ if args.process_unknown is None:
+ args.process_unknown = True
+
ynl = YnlFamily(spec, args.schema, args.process_unknown,
recv_size=args.dbg_small_recv)
if args.dbg_small_recv:
@@ -135,6 +203,28 @@ def main():
for op_name, op in ynl.msgs.items():
print(op_name, " [", ", ".join(op.modes), "]")
+ if args.list_attrs:
+ op = ynl.msgs.get(args.list_attrs)
+ if not op:
+ print(f'Operation {args.list_attrs} not found')
+ exit(1)
+
+ print(f'Operation: {op.name}')
+ print(op.yaml['doc'])
+
+ for mode in ['do', 'dump', 'event']:
+ if mode in op.yaml:
+ print_mode_attrs(ynl, mode, op.yaml[mode], op.attr_set, True)
+
+ if 'notify' in op.yaml:
+ mode_spec = op.yaml['notify']
+ ref_spec = ynl.msgs.get(mode_spec).yaml.get('do')
+ if ref_spec:
+ print_mode_attrs(ynl, 'notify', ref_spec, op.attr_set, False)
+
+ if 'mcgrp' in op.yaml:
+ print(f"\nMulticast group: {op.yaml['mcgrp']}")
+
try:
if args.do:
reply = ynl.do(args.do, attrs, args.flags)
diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py
index cab6b576c876..fd0f6b8d54d1 100755
--- a/tools/net/ynl/pyynl/ethtool.py
+++ b/tools/net/ynl/pyynl/ethtool.py
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
import argparse
-import json
import pathlib
import pprint
import sys
@@ -45,13 +44,16 @@ def print_field(reply, *desc):
Pretty-print a set of fields from the reply. desc specifies the
fields and the optional type (bool/yn).
"""
+ if not reply:
+ return
+
if len(desc) == 0:
return print_field(reply, *zip(reply.keys(), reply.keys()))
for spec in desc:
try:
field, name, tp = spec
- except:
+ except ValueError:
field, name = spec
tp = 'int'
@@ -156,7 +158,6 @@ def main():
global args
args = parser.parse_args()
- script_abs_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
spec = os.path.join(spec_dir(), 'ethtool.yaml')
schema = os.path.join(schema_dir(), 'genetlink-legacy.yaml')
@@ -255,14 +256,14 @@ def main():
reply = dumpit(ynl, args, 'channels-get')
print(f'Channel parameters for {args.device}:')
- print(f'Pre-set maximums:')
+ print('Pre-set maximums:')
print_field(reply,
('rx-max', 'RX'),
('tx-max', 'TX'),
('other-max', 'Other'),
('combined-max', 'Combined'))
- print(f'Current hardware settings:')
+ print('Current hardware settings:')
print_field(reply,
('rx-count', 'RX'),
('tx-count', 'TX'),
@@ -276,14 +277,14 @@ def main():
print(f'Ring parameters for {args.device}:')
- print(f'Pre-set maximums:')
+ print('Pre-set maximums:')
print_field(reply,
('rx-max', 'RX'),
('rx-mini-max', 'RX Mini'),
('rx-jumbo-max', 'RX Jumbo'),
('tx-max', 'TX'))
- print(f'Current hardware settings:')
+ print('Current hardware settings:')
print_field(reply,
('rx', 'RX'),
('rx-mini', 'RX Mini'),
@@ -298,7 +299,7 @@ def main():
return
if args.statistics:
- print(f'NIC statistics:')
+ print('NIC statistics:')
# TODO: pass id?
strset = dumpit(ynl, args, 'strset-get')
diff --git a/tools/net/ynl/pyynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py
index 71518b9842ee..ec9ea00071be 100644
--- a/tools/net/ynl/pyynl/lib/__init__.py
+++ b/tools/net/ynl/pyynl/lib/__init__.py
@@ -4,6 +4,8 @@ from .nlspec import SpecAttr, SpecAttrSet, SpecEnumEntry, SpecEnumSet, \
SpecFamily, SpecOperation, SpecSubMessage, SpecSubMessageFormat
from .ynl import YnlFamily, Netlink, NlError
+from .doc_generator import YnlDocGenerator
+
__all__ = ["SpecAttr", "SpecAttrSet", "SpecEnumEntry", "SpecEnumSet",
"SpecFamily", "SpecOperation", "SpecSubMessage", "SpecSubMessageFormat",
- "YnlFamily", "Netlink", "NlError"]
+ "YnlFamily", "Netlink", "NlError", "YnlDocGenerator"]
diff --git a/tools/net/ynl/pyynl/lib/doc_generator.py b/tools/net/ynl/pyynl/lib/doc_generator.py
new file mode 100644
index 000000000000..3a16b8eb01ca
--- /dev/null
+++ b/tools/net/ynl/pyynl/lib/doc_generator.py
@@ -0,0 +1,402 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8; mode: python -*-
+
+"""
+ Class to auto generate the documentation for Netlink specifications.
+
+ :copyright: Copyright (C) 2023 Breno Leitao <leitao@debian.org>
+ :license: GPL Version 2, June 1991 see linux/COPYING for details.
+
+ This class performs extensive parsing to the Linux kernel's netlink YAML
+ spec files, in an effort to avoid needing to heavily mark up the original
+ YAML file.
+
+ This code is split in two classes:
+ 1) RST formatters: Use to convert a string to a RST output
+ 2) YAML Netlink (YNL) doc generator: Generate docs from YAML data
+"""
+
+from typing import Any, Dict, List
+import yaml
+
+LINE_STR = '__lineno__'
+
+class NumberedSafeLoader(yaml.SafeLoader): # pylint: disable=R0901
+ """Override the SafeLoader class to add line number to parsed data"""
+
+ def construct_mapping(self, node, *args, **kwargs):
+ mapping = super().construct_mapping(node, *args, **kwargs)
+ mapping[LINE_STR] = node.start_mark.line
+
+ return mapping
+
+class RstFormatters:
+ """RST Formatters"""
+
+ SPACE_PER_LEVEL = 4
+
+ @staticmethod
+ def headroom(level: int) -> str:
+ """Return space to format"""
+ return " " * (level * RstFormatters.SPACE_PER_LEVEL)
+
+ @staticmethod
+ def bold(text: str) -> str:
+ """Format bold text"""
+ return f"**{text}**"
+
+ @staticmethod
+ def inline(text: str) -> str:
+ """Format inline text"""
+ return f"``{text}``"
+
+ @staticmethod
+ def sanitize(text: str) -> str:
+ """Remove newlines and multiple spaces"""
+ # This is useful for some fields that are spread across multiple lines
+ return str(text).replace("\n", " ").strip()
+
+ def rst_fields(self, key: str, value: str, level: int = 0) -> str:
+ """Return a RST formatted field"""
+ return self.headroom(level) + f":{key}: {value}"
+
+ def rst_definition(self, key: str, value: Any, level: int = 0) -> str:
+ """Format a single rst definition"""
+ return self.headroom(level) + key + "\n" + self.headroom(level + 1) + str(value)
+
+ def rst_paragraph(self, paragraph: str, level: int = 0) -> str:
+ """Return a formatted paragraph"""
+ return self.headroom(level) + paragraph
+
+ def rst_bullet(self, item: str, level: int = 0) -> str:
+ """Return a formatted a bullet"""
+ return self.headroom(level) + f"- {item}"
+
+ @staticmethod
+ def rst_subsection(title: str) -> str:
+ """Add a sub-section to the document"""
+ return f"{title}\n" + "-" * len(title)
+
+ @staticmethod
+ def rst_subsubsection(title: str) -> str:
+ """Add a sub-sub-section to the document"""
+ return f"{title}\n" + "~" * len(title)
+
+ @staticmethod
+ def rst_section(namespace: str, prefix: str, title: str) -> str:
+ """Add a section to the document"""
+ return f".. _{namespace}-{prefix}-{title}:\n\n{title}\n" + "=" * len(title)
+
+ @staticmethod
+ def rst_subtitle(title: str) -> str:
+ """Add a subtitle to the document"""
+ return "\n" + "-" * len(title) + f"\n{title}\n" + "-" * len(title) + "\n\n"
+
+ @staticmethod
+ def rst_title(title: str) -> str:
+ """Add a title to the document"""
+ return "=" * len(title) + f"\n{title}\n" + "=" * len(title) + "\n\n"
+
+ def rst_list_inline(self, list_: List[str], level: int = 0) -> str:
+ """Format a list using inlines"""
+ return self.headroom(level) + "[" + ", ".join(self.inline(i) for i in list_) + "]"
+
+ @staticmethod
+ def rst_ref(namespace: str, prefix: str, name: str) -> str:
+ """Add a hyperlink to the document"""
+ mappings = {'enum': 'definition',
+ 'fixed-header': 'definition',
+ 'nested-attributes': 'attribute-set',
+ 'struct': 'definition'}
+ if prefix in mappings:
+ prefix = mappings[prefix]
+ return f":ref:`{namespace}-{prefix}-{name}`"
+
+ def rst_header(self) -> str:
+ """The headers for all the auto generated RST files"""
+ lines = []
+
+ lines.append(self.rst_paragraph(".. SPDX-License-Identifier: GPL-2.0"))
+ lines.append(self.rst_paragraph(".. NOTE: This document was auto-generated.\n\n"))
+
+ return "\n".join(lines)
+
+ @staticmethod
+ def rst_toctree(maxdepth: int = 2) -> str:
+ """Generate a toctree RST primitive"""
+ lines = []
+
+ lines.append(".. toctree::")
+ lines.append(f" :maxdepth: {maxdepth}\n\n")
+
+ return "\n".join(lines)
+
+ @staticmethod
+ def rst_label(title: str) -> str:
+ """Return a formatted label"""
+ return f".. _{title}:\n\n"
+
+ @staticmethod
+ def rst_lineno(lineno: int) -> str:
+ """Return a lineno comment"""
+ return f".. LINENO {lineno}\n"
+
+class YnlDocGenerator:
+ """YAML Netlink specs Parser"""
+
+ fmt = RstFormatters()
+
+ def parse_mcast_group(self, mcast_group: List[Dict[str, Any]]) -> str:
+ """Parse 'multicast' group list and return a formatted string"""
+ lines = []
+ for group in mcast_group:
+ lines.append(self.fmt.rst_bullet(group["name"]))
+
+ return "\n".join(lines)
+
+ def parse_do(self, do_dict: Dict[str, Any], level: int = 0) -> str:
+ """Parse 'do' section and return a formatted string"""
+ lines = []
+ if LINE_STR in do_dict:
+ lines.append(self.fmt.rst_lineno(do_dict[LINE_STR]))
+
+ for key in do_dict.keys():
+ if key == LINE_STR:
+ continue
+ lines.append(self.fmt.rst_paragraph(self.fmt.bold(key), level + 1))
+ if key in ['request', 'reply']:
+ lines.append(self.parse_do_attributes(do_dict[key], level + 1) + "\n")
+ else:
+ lines.append(self.fmt.headroom(level + 2) + do_dict[key] + "\n")
+
+ return "\n".join(lines)
+
+ def parse_do_attributes(self, attrs: Dict[str, Any], level: int = 0) -> str:
+ """Parse 'attributes' section"""
+ if "attributes" not in attrs:
+ return ""
+ lines = [self.fmt.rst_fields("attributes",
+ self.fmt.rst_list_inline(attrs["attributes"]),
+ level + 1)]
+
+ return "\n".join(lines)
+
+ def parse_operations(self, operations: List[Dict[str, Any]], namespace: str) -> str:
+ """Parse operations block"""
+ preprocessed = ["name", "doc", "title", "do", "dump", "flags"]
+ linkable = ["fixed-header", "attribute-set"]
+ lines = []
+
+ for operation in operations:
+ if LINE_STR in operation:
+ lines.append(self.fmt.rst_lineno(operation[LINE_STR]))
+
+ lines.append(self.fmt.rst_section(namespace, 'operation',
+ operation["name"]))
+ lines.append(self.fmt.rst_paragraph(operation["doc"]) + "\n")
+
+ for key in operation.keys():
+ if key == LINE_STR:
+ continue
+
+ if key in preprocessed:
+ # Skip the special fields
+ continue
+ value = operation[key]
+ if key in linkable:
+ value = self.fmt.rst_ref(namespace, key, value)
+ lines.append(self.fmt.rst_fields(key, value, 0))
+ if 'flags' in operation:
+ lines.append(self.fmt.rst_fields('flags',
+ self.fmt.rst_list_inline(operation['flags'])))
+
+ if "do" in operation:
+ lines.append(self.fmt.rst_paragraph(":do:", 0))
+ lines.append(self.parse_do(operation["do"], 0))
+ if "dump" in operation:
+ lines.append(self.fmt.rst_paragraph(":dump:", 0))
+ lines.append(self.parse_do(operation["dump"], 0))
+
+ # New line after fields
+ lines.append("\n")
+
+ return "\n".join(lines)
+
+ def parse_entries(self, entries: List[Dict[str, Any]], level: int) -> str:
+ """Parse a list of entries"""
+ ignored = ["pad"]
+ lines = []
+ for entry in entries:
+ if isinstance(entry, dict):
+ # entries could be a list or a dictionary
+ field_name = entry.get("name", "")
+ if field_name in ignored:
+ continue
+ type_ = entry.get("type")
+ if type_:
+ field_name += f" ({self.fmt.inline(type_)})"
+ lines.append(
+ self.fmt.rst_fields(field_name,
+ self.fmt.sanitize(entry.get("doc", "")),
+ level)
+ )
+ elif isinstance(entry, list):
+ lines.append(self.fmt.rst_list_inline(entry, level))
+ else:
+ lines.append(self.fmt.rst_bullet(self.fmt.inline(self.fmt.sanitize(entry)),
+ level))
+
+ lines.append("\n")
+ return "\n".join(lines)
+
+ def parse_definitions(self, defs: Dict[str, Any], namespace: str) -> str:
+ """Parse definitions section"""
+ preprocessed = ["name", "entries", "members"]
+ ignored = ["render-max"] # This is not printed
+ lines = []
+
+ for definition in defs:
+ if LINE_STR in definition:
+ lines.append(self.fmt.rst_lineno(definition[LINE_STR]))
+
+ lines.append(self.fmt.rst_section(namespace, 'definition', definition["name"]))
+ for k in definition.keys():
+ if k == LINE_STR:
+ continue
+ if k in preprocessed + ignored:
+ continue
+ lines.append(self.fmt.rst_fields(k, self.fmt.sanitize(definition[k]), 0))
+
+ # Field list needs to finish with a new line
+ lines.append("\n")
+ if "entries" in definition:
+ lines.append(self.fmt.rst_paragraph(":entries:", 0))
+ lines.append(self.parse_entries(definition["entries"], 1))
+ if "members" in definition:
+ lines.append(self.fmt.rst_paragraph(":members:", 0))
+ lines.append(self.parse_entries(definition["members"], 1))
+
+ return "\n".join(lines)
+
+ def parse_attr_sets(self, entries: List[Dict[str, Any]], namespace: str) -> str:
+ """Parse attribute from attribute-set"""
+ preprocessed = ["name", "type"]
+ linkable = ["enum", "nested-attributes", "struct", "sub-message"]
+ ignored = ["checks"]
+ lines = []
+
+ for entry in entries:
+ lines.append(self.fmt.rst_section(namespace, 'attribute-set',
+ entry["name"]))
+
+ if "doc" in entry:
+ lines.append(self.fmt.rst_paragraph(entry["doc"], 0) + "\n")
+
+ for attr in entry["attributes"]:
+ if LINE_STR in attr:
+ lines.append(self.fmt.rst_lineno(attr[LINE_STR]))
+
+ type_ = attr.get("type")
+ attr_line = attr["name"]
+ if type_:
+ # Add the attribute type in the same line
+ attr_line += f" ({self.fmt.inline(type_)})"
+
+ lines.append(self.fmt.rst_subsubsection(attr_line))
+
+ for k in attr.keys():
+ if k == LINE_STR:
+ continue
+ if k in preprocessed + ignored:
+ continue
+ if k in linkable:
+ value = self.fmt.rst_ref(namespace, k, attr[k])
+ else:
+ value = self.fmt.sanitize(attr[k])
+ lines.append(self.fmt.rst_fields(k, value, 0))
+ lines.append("\n")
+
+ return "\n".join(lines)
+
+ def parse_sub_messages(self, entries: List[Dict[str, Any]], namespace: str) -> str:
+ """Parse sub-message definitions"""
+ lines = []
+
+ for entry in entries:
+ lines.append(self.fmt.rst_section(namespace, 'sub-message',
+ entry["name"]))
+ for fmt in entry["formats"]:
+ value = fmt["value"]
+
+ lines.append(self.fmt.rst_bullet(self.fmt.bold(value)))
+ for attr in ['fixed-header', 'attribute-set']:
+ if attr in fmt:
+ lines.append(self.fmt.rst_fields(attr,
+ self.fmt.rst_ref(namespace,
+ attr,
+ fmt[attr]),
+ 1))
+ lines.append("\n")
+
+ return "\n".join(lines)
+
+ def parse_yaml(self, obj: Dict[str, Any]) -> str:
+ """Format the whole YAML into a RST string"""
+ lines = []
+
+ # Main header
+ lineno = obj.get('__lineno__', 0)
+ lines.append(self.fmt.rst_lineno(lineno))
+
+ family = obj['name']
+
+ lines.append(self.fmt.rst_header())
+ lines.append(self.fmt.rst_label("netlink-" + family))
+
+ title = f"Family ``{family}`` netlink specification"
+ lines.append(self.fmt.rst_title(title))
+ lines.append(self.fmt.rst_paragraph(".. contents:: :depth: 3\n"))
+
+ if "doc" in obj:
+ lines.append(self.fmt.rst_subtitle("Summary"))
+ lines.append(self.fmt.rst_paragraph(obj["doc"], 0))
+
+ # Operations
+ if "operations" in obj:
+ lines.append(self.fmt.rst_subtitle("Operations"))
+ lines.append(self.parse_operations(obj["operations"]["list"],
+ family))
+
+ # Multicast groups
+ if "mcast-groups" in obj:
+ lines.append(self.fmt.rst_subtitle("Multicast groups"))
+ lines.append(self.parse_mcast_group(obj["mcast-groups"]["list"]))
+
+ # Definitions
+ if "definitions" in obj:
+ lines.append(self.fmt.rst_subtitle("Definitions"))
+ lines.append(self.parse_definitions(obj["definitions"], family))
+
+ # Attributes set
+ if "attribute-sets" in obj:
+ lines.append(self.fmt.rst_subtitle("Attribute sets"))
+ lines.append(self.parse_attr_sets(obj["attribute-sets"], family))
+
+ # Sub-messages
+ if "sub-messages" in obj:
+ lines.append(self.fmt.rst_subtitle("Sub-messages"))
+ lines.append(self.parse_sub_messages(obj["sub-messages"], family))
+
+ return "\n".join(lines)
+
+ # Main functions
+ # ==============
+
+ def parse_yaml_file(self, filename: str) -> str:
+ """Transform the YAML specified by filename into an RST-formatted string"""
+ with open(filename, "r", encoding="utf-8") as spec_file:
+ numbered_yaml = yaml.load(spec_file, Loader=NumberedSafeLoader)
+ content = self.parse_yaml(numbered_yaml)
+
+ return content
diff --git a/tools/net/ynl/pyynl/lib/nlspec.py b/tools/net/ynl/pyynl/lib/nlspec.py
index 314ec8007496..85c17fe01e35 100644
--- a/tools/net/ynl/pyynl/lib/nlspec.py
+++ b/tools/net/ynl/pyynl/lib/nlspec.py
@@ -501,7 +501,7 @@ class SpecFamily(SpecElement):
return SpecStruct(self, elem)
def new_sub_message(self, elem):
- return SpecSubMessage(self, elem);
+ return SpecSubMessage(self, elem)
def new_operation(self, elem, req_val, rsp_val):
return SpecOperation(self, elem, req_val, rsp_val)
diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index 8244a5f440b2..36d36eb7e3b8 100644
--- a/tools/net/ynl/pyynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -9,7 +9,6 @@ import socket
import struct
from struct import Struct
import sys
-import yaml
import ipaddress
import uuid
import queue
@@ -101,12 +100,21 @@ class Netlink:
'bitfield32', 'sint', 'uint'])
class NlError(Exception):
- def __init__(self, nl_msg):
- self.nl_msg = nl_msg
- self.error = -nl_msg.error
-
- def __str__(self):
- return f"Netlink error: {os.strerror(self.error)}\n{self.nl_msg}"
+ def __init__(self, nl_msg):
+ self.nl_msg = nl_msg
+ self.error = -nl_msg.error
+
+ def __str__(self):
+ msg = "Netlink error: "
+
+ extack = self.nl_msg.extack.copy() if self.nl_msg.extack else {}
+ if 'msg' in extack:
+ msg += extack['msg'] + ': '
+ del extack['msg']
+ msg += os.strerror(self.error)
+ if extack:
+ msg += ' ' + str(extack)
+ return msg
class ConfigError(Exception):
@@ -562,11 +570,13 @@ class YnlFamily(SpecFamily):
if attr["type"] == 'nest':
nl_type |= Netlink.NLA_F_NESTED
- attr_payload = b''
sub_space = attr['nested-attributes']
- sub_attrs = SpaceAttrs(self.attr_sets[sub_space], value, search_attrs)
- for subname, subvalue in value.items():
- attr_payload += self._add_attr(sub_space, subname, subvalue, sub_attrs)
+ attr_payload = self._add_nest_attrs(value, sub_space, search_attrs)
+ elif attr['type'] == 'indexed-array' and attr['sub-type'] == 'nest':
+ nl_type |= Netlink.NLA_F_NESTED
+ sub_space = attr['nested-attributes']
+ attr_payload = self._encode_indexed_array(value, sub_space,
+ search_attrs)
elif attr["type"] == 'flag':
if not value:
# If value is absent or false then skip attribute creation.
@@ -620,9 +630,28 @@ class YnlFamily(SpecFamily):
else:
raise Exception(f'Unknown type at {space} {name} {value} {attr["type"]}')
+ return self._add_attr_raw(nl_type, attr_payload)
+
+ def _add_attr_raw(self, nl_type, attr_payload):
pad = b'\x00' * ((4 - len(attr_payload) % 4) % 4)
return struct.pack('HH', len(attr_payload) + 4, nl_type) + attr_payload + pad
+ def _add_nest_attrs(self, value, sub_space, search_attrs):
+ sub_attrs = SpaceAttrs(self.attr_sets[sub_space], value, search_attrs)
+ attr_payload = b''
+ for subname, subvalue in value.items():
+ attr_payload += self._add_attr(sub_space, subname, subvalue,
+ sub_attrs)
+ return attr_payload
+
+ def _encode_indexed_array(self, vals, sub_space, search_attrs):
+ attr_payload = b''
+ for i, val in enumerate(vals):
+ idx = i | Netlink.NLA_F_NESTED
+ val_payload = self._add_nest_attrs(val, sub_space, search_attrs)
+ attr_payload += self._add_attr_raw(idx, val_payload)
+ return attr_payload
+
def _get_enum_or_unknown(self, enum, raw):
try:
name = enum.entries_by_val[raw].name
@@ -706,7 +735,7 @@ class YnlFamily(SpecFamily):
return attr.as_bin()
def _rsp_add(self, rsp, name, is_multi, decoded):
- if is_multi == None:
+ if is_multi is None:
if name in rsp and type(rsp[name]) is not list:
rsp[name] = [rsp[name]]
is_multi = True
@@ -739,14 +768,14 @@ class YnlFamily(SpecFamily):
decoded = {}
offset = 0
if msg_format.fixed_header:
- decoded.update(self._decode_struct(attr.raw, msg_format.fixed_header));
+ decoded.update(self._decode_struct(attr.raw, msg_format.fixed_header))
offset = self._struct_size(msg_format.fixed_header)
if msg_format.attr_set:
if msg_format.attr_set in self.attr_sets:
subdict = self._decode(NlAttrs(attr.raw, offset), msg_format.attr_set)
decoded.update(subdict)
else:
- raise Exception(f"Unknown attribute-set '{attr_space}' when decoding '{attr_spec.name}'")
+ raise Exception(f"Unknown attribute-set '{msg_format.attr_set}' when decoding '{attr_spec.name}'")
return decoded
def _decode(self, attrs, space, outer_attrs = None):
@@ -936,7 +965,7 @@ class YnlFamily(SpecFamily):
formatted = hex(raw)
else:
formatted = bytes.hex(raw, ' ')
- elif display_hint in [ 'ipv4', 'ipv6' ]:
+ elif display_hint in [ 'ipv4', 'ipv6', 'ipv4-or-v6' ]:
formatted = format(ipaddress.ip_address(raw))
elif display_hint == 'uuid':
formatted = str(uuid.UUID(bytes=raw))
@@ -945,12 +974,26 @@ class YnlFamily(SpecFamily):
return formatted
def _from_string(self, string, attr_spec):
- if attr_spec.display_hint in ['ipv4', 'ipv6']:
+ if attr_spec.display_hint in ['ipv4', 'ipv6', 'ipv4-or-v6']:
ip = ipaddress.ip_address(string)
if attr_spec['type'] == 'binary':
raw = ip.packed
else:
raw = int(ip)
+ elif attr_spec.display_hint == 'hex':
+ if attr_spec['type'] == 'binary':
+ raw = bytes.fromhex(string)
+ else:
+ raw = int(string, 16)
+ elif attr_spec.display_hint == 'mac':
+ # Parse MAC address in format "00:11:22:33:44:55" or "001122334455"
+ if ':' in string:
+ mac_bytes = [int(x, 16) for x in string.split(':')]
+ else:
+ if len(string) % 2 != 0:
+ raise Exception(f"Invalid MAC address format: {string}")
+ mac_bytes = [int(string[i:i+2], 16) for i in range(0, len(string), 2)]
+ raw = bytes(mac_bytes)
else:
raise Exception(f"Display hint '{attr_spec.display_hint}' not implemented"
f" when parsing '{attr_spec['name']}'")
@@ -1014,15 +1057,15 @@ class YnlFamily(SpecFamily):
self.check_ntf()
def operation_do_attributes(self, name):
- """
- For a given operation name, find and return a supported
- set of attributes (as a dict).
- """
- op = self.find_operation(name)
- if not op:
- return None
-
- return op['do']['request']['attributes'].copy()
+ """
+ For a given operation name, find and return a supported
+ set of attributes (as a dict).
+ """
+ op = self.find_operation(name)
+ if not op:
+ return None
+
+ return op['do']['request']['attributes'].copy()
def _encode_message(self, op, vals, flags, req_seq):
nl_flags = Netlink.NLM_F_REQUEST | Netlink.NLM_F_ACK
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index ef032e17fec4..b517d0c605ad 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)
import argparse
-import collections
import filecmp
import pathlib
import os
@@ -14,7 +13,7 @@ import yaml
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry
-from lib import SpecSubMessage, SpecSubMessageFormat
+from lib import SpecSubMessage
def c_upper(name):
@@ -243,7 +242,7 @@ class Type(SpecAttr):
raise Exception(f"Attr get not implemented for class type {self.type}")
def attr_get(self, ri, var, first):
- lines, init_lines, local_vars = self._attr_get(ri, var)
+ lines, init_lines, _ = self._attr_get(ri, var)
if type(lines) is str:
lines = [lines]
if type(init_lines) is str:
@@ -251,10 +250,6 @@ class Type(SpecAttr):
kw = 'if' if first else 'else if'
ri.cw.block_start(line=f"{kw} (type == {self.enum_name})")
- if local_vars:
- for local in local_vars:
- ri.cw.p(local)
- ri.cw.nl()
if not self.is_multi_val():
ri.cw.p("if (ynl_attr_validate(yarg, attr))")
@@ -398,7 +393,7 @@ class TypeScalar(Type):
if 'enum' in self.attr:
enum = self.family.consts[self.attr['enum']]
low, high = enum.value_range()
- if low == None and high == None:
+ if low is None and high is None:
self.checks['sparse'] = True
else:
if 'min' not in self.checks:
@@ -485,7 +480,7 @@ class TypeString(Type):
ri.cw.p(f"char *{self.c_name};")
def _attr_typol(self):
- typol = f'.type = YNL_PT_NUL_STR, '
+ typol = '.type = YNL_PT_NUL_STR, '
if self.is_selector:
typol += '.is_selector = 1, '
return typol
@@ -539,7 +534,7 @@ class TypeBinary(Type):
ri.cw.p(f"void *{self.c_name};")
def _attr_typol(self):
- return f'.type = YNL_PT_BINARY,'
+ return '.type = YNL_PT_BINARY,'
def _attr_policy(self, policy):
if len(self.checks) == 0:
@@ -556,7 +551,7 @@ class TypeBinary(Type):
elif 'exact-len' in self.checks:
mem = 'NLA_POLICY_EXACT_LEN(' + self.get_limit_str('exact-len') + ')'
elif 'min-len' in self.checks:
- mem = '{ .len = ' + self.get_limit_str('min-len') + ', }'
+ mem = 'NLA_POLICY_MIN_LEN(' + self.get_limit_str('min-len') + ')'
elif 'max-len' in self.checks:
mem = 'NLA_POLICY_MAX_LEN(' + self.get_limit_str('max-len') + ')'
@@ -636,10 +631,10 @@ class TypeBitfield32(Type):
return "struct nla_bitfield32"
def _attr_typol(self):
- return f'.type = YNL_PT_BITFIELD32, '
+ return '.type = YNL_PT_BITFIELD32, '
def _attr_policy(self, policy):
- if not 'enum' in self.attr:
+ if 'enum' not in self.attr:
raise Exception('Enum required for bitfield32 attr')
enum = self.family.consts[self.attr['enum']]
mask = enum.get_mask(as_flags=True)
@@ -725,7 +720,11 @@ class TypeMultiAttr(Type):
return 'struct ynl_string *'
elif self.attr['type'] in scalars:
scalar_pfx = '__' if ri.ku_space == 'user' else ''
- return scalar_pfx + self.attr['type']
+ if self.is_auto_scalar:
+ name = self.type[0] + '64'
+ else:
+ name = self.attr['type']
+ return scalar_pfx + name
else:
raise Exception(f"Sub-type {self.attr['type']} not supported yet")
@@ -792,7 +791,7 @@ class TypeMultiAttr(Type):
f"{presence} = n_{self.c_name};"]
-class TypeArrayNest(Type):
+class TypeIndexedArray(Type):
def is_multi_val(self):
return True
@@ -816,21 +815,28 @@ class TypeArrayNest(Type):
f'unsigned int n_{self.c_name}']
return super().arg_member(ri)
+ def _attr_policy(self, policy):
+ if self.attr['sub-type'] == 'nest':
+ return f'NLA_POLICY_NESTED_ARRAY({self.nested_render_name}_nl_policy)'
+ return super()._attr_policy(policy)
+
def _attr_typol(self):
if self.attr['sub-type'] in scalars:
return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, '
elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
return f'.type = YNL_PT_BINARY, .len = {self.checks["exact-len"]}, '
- else:
+ elif self.attr['sub-type'] == 'nest':
return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
+ else:
+ raise Exception(f"Typol for IndexedArray sub-type {self.attr['sub-type']} not supported, yet")
def _attr_get(self, ri, var):
local_vars = ['const struct nlattr *attr2;']
get_lines = [f'attr_{self.c_name} = attr;',
'ynl_attr_for_each_nested(attr2, attr) {',
- '\tif (ynl_attr_validate(yarg, attr2))',
+ '\tif (__ynl_attr_validate(yarg, attr2, type))',
'\t\treturn YNL_PARSE_CB_ERROR;',
- f'\t{var}->_count.{self.c_name}++;',
+ f'\tn_{self.c_name}++;',
'}']
return get_lines, None, local_vars
@@ -848,13 +854,25 @@ class TypeArrayNest(Type):
ri.cw.p(f'for (i = 0; i < {var}->_count.{self.c_name}; i++)')
ri.cw.p(f"{self.nested_render_name}_put(nlh, i, &{var}->{self.c_name}[i]);")
else:
- raise Exception(f"Put for ArrayNest sub-type {self.attr['sub-type']} not supported, yet")
+ raise Exception(f"Put for IndexedArray sub-type {self.attr['sub-type']} not supported, yet")
ri.cw.p('ynl_attr_nest_end(nlh, array);')
def _setter_lines(self, ri, member, presence):
return [f"{member} = {self.c_name};",
f"{presence} = n_{self.c_name};"]
+ def free_needs_iter(self):
+ return self.sub_type == 'nest'
+
+ def _free_lines(self, ri, var, ref):
+ lines = []
+ if self.sub_type == 'nest':
+ lines += [
+ f"for (i = 0; i < {var}->{ref}_count.{self.c_name}; i++)",
+ f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);',
+ ]
+ lines += f"free({var}->{ref}{self.c_name});",
+ return lines
class TypeNestTypeValue(Type):
def _complex_member_type(self, ri):
@@ -909,7 +927,7 @@ class TypeSubMessage(TypeNest):
else:
sel_var = f"{var}->{sel}"
get_lines = [f'if (!{sel_var})',
- f'return ynl_submsg_failed(yarg, "%s", "%s");' %
+ 'return ynl_submsg_failed(yarg, "%s", "%s");' %
(self.name, self['selector']),
f"if ({self.nested_render_name}_parse(&parg, {sel_var}, attr))",
"return YNL_PARSE_CB_ERROR;"]
@@ -1125,7 +1143,7 @@ class AttrSet(SpecAttrSet):
t = TypeNest(self.family, self, elem, value)
elif elem['type'] == 'indexed-array' and 'sub-type' in elem:
if elem["sub-type"] in ['binary', 'nest', 'u32']:
- t = TypeArrayNest(self.family, self, elem, value)
+ t = TypeIndexedArray(self.family, self, elem, value)
else:
raise Exception(f'new_attr: unsupported sub-type {elem["sub-type"]}')
elif elem['type'] == 'nest-type-value':
@@ -1187,7 +1205,7 @@ class SubMessage(SpecSubMessage):
class Family(SpecFamily):
- def __init__(self, file_name, exclude_ops):
+ def __init__(self, file_name, exclude_ops, fn_prefix):
# Added by resolve:
self.c_name = None
delattr(self, "c_name")
@@ -1219,6 +1237,8 @@ class Family(SpecFamily):
else:
self.uapi_header_name = self.ident_name
+ self.fn_prefix = fn_prefix if fn_prefix else f'{self.ident_name}-nl'
+
def resolve(self):
self.resolve_up(super())
@@ -1563,7 +1583,7 @@ class RenderInfo:
if family.is_classic():
self.fixed_hdr_len = f"sizeof(struct {c_lower(fixed_hdr)})"
else:
- raise Exception(f"Per-op fixed header not supported, yet")
+ raise Exception("Per-op fixed header not supported, yet")
# 'do' and 'dump' response parsing is identical
@@ -2034,6 +2054,20 @@ def put_enum_to_str(family, cw, enum):
_put_enum_to_str_helper(cw, enum.render_name, map_name, 'value', enum=enum)
+def put_local_vars(struct):
+ local_vars = []
+ has_array = False
+ has_count = False
+ for _, arg in struct.member_list():
+ has_array |= arg.type == 'indexed-array'
+ has_count |= arg.presence_type() == 'count'
+ if has_array:
+ local_vars.append('struct nlattr *array;')
+ if has_count:
+ local_vars.append('unsigned int i;')
+ return local_vars
+
+
def put_req_nested_prototype(ri, struct, suffix=';'):
func_args = ['struct nlmsghdr *nlh',
'unsigned int attr_type',
@@ -2056,15 +2090,7 @@ def put_req_nested(ri, struct):
init_lines.append(f"hdr = ynl_nlmsg_put_extra_header(nlh, {struct_sz});")
init_lines.append(f"memcpy(hdr, &obj->_hdr, {struct_sz});")
- has_anest = False
- has_count = False
- for _, arg in struct.member_list():
- has_anest |= arg.type == 'indexed-array'
- has_count |= arg.presence_type() == 'count'
- if has_anest:
- local_vars.append('struct nlattr *array;')
- if has_count:
- local_vars.append('unsigned int i;')
+ local_vars += put_local_vars(struct)
put_req_nested_prototype(ri, struct, suffix='')
ri.cw.block_start()
@@ -2099,35 +2125,43 @@ def _multi_parse(ri, struct, init_lines, local_vars):
if ri.family.is_classic():
iter_line = f"ynl_attr_for_each(attr, nlh, sizeof({struct.fixed_header}))"
else:
- raise Exception(f"Per-op fixed header not supported, yet")
+ raise Exception("Per-op fixed header not supported, yet")
- array_nests = set()
+ indexed_arrays = set()
multi_attrs = set()
needs_parg = False
+ var_set = set()
for arg, aspec in struct.member_list():
if aspec['type'] == 'indexed-array' and 'sub-type' in aspec:
if aspec["sub-type"] in {'binary', 'nest'}:
- local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
- array_nests.add(arg)
+ local_vars.append(f'const struct nlattr *attr_{aspec.c_name} = NULL;')
+ indexed_arrays.add(arg)
elif aspec['sub-type'] in scalars:
- local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
- array_nests.add(arg)
+ local_vars.append(f'const struct nlattr *attr_{aspec.c_name} = NULL;')
+ indexed_arrays.add(arg)
else:
raise Exception(f'Not supported sub-type {aspec["sub-type"]}')
if 'multi-attr' in aspec:
multi_attrs.add(arg)
needs_parg |= 'nested-attributes' in aspec
needs_parg |= 'sub-message' in aspec
- if array_nests or multi_attrs:
+
+ try:
+ _, _, l_vars = aspec._attr_get(ri, '')
+ var_set |= set(l_vars) if l_vars else set()
+ except Exception:
+ pass # _attr_get() not implemented by simple types, ignore
+ local_vars += list(var_set)
+ if indexed_arrays or multi_attrs:
local_vars.append('int i;')
if needs_parg:
local_vars.append('struct ynl_parse_arg parg;')
init_lines.append('parg.ys = yarg->ys;')
- all_multi = array_nests | multi_attrs
+ all_multi = indexed_arrays | multi_attrs
- for anest in sorted(all_multi):
- local_vars.append(f"unsigned int n_{struct[anest].c_name} = 0;")
+ for arg in sorted(all_multi):
+ local_vars.append(f"unsigned int n_{struct[arg].c_name} = 0;")
ri.cw.block_start()
ri.cw.write_func_lvar(local_vars)
@@ -2147,8 +2181,8 @@ def _multi_parse(ri, struct, init_lines, local_vars):
else:
ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({struct.fixed_header}));")
- for anest in sorted(all_multi):
- aspec = struct[anest]
+ for arg in sorted(all_multi):
+ aspec = struct[arg]
ri.cw.p(f"if (dst->{aspec.c_name})")
ri.cw.p(f'return ynl_error_parse(yarg, "attribute already present ({struct.attr_set.name}.{aspec.name})");')
@@ -2166,8 +2200,8 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.block_end()
ri.cw.nl()
- for anest in sorted(array_nests):
- aspec = struct[anest]
+ for arg in sorted(indexed_arrays):
+ aspec = struct[arg]
ri.cw.block_start(line=f"if (n_{aspec.c_name})")
ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
@@ -2192,8 +2226,8 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.block_end()
ri.cw.nl()
- for anest in sorted(multi_attrs):
- aspec = struct[anest]
+ for arg in sorted(multi_attrs):
+ aspec = struct[arg]
ri.cw.block_start(line=f"if (n_{aspec.c_name})")
ri.cw.p(f"dst->{aspec.c_name} = calloc(n_{aspec.c_name}, sizeof(*dst->{aspec.c_name}));")
ri.cw.p(f"dst->_count.{aspec.c_name} = n_{aspec.c_name};")
@@ -2348,10 +2382,7 @@ def print_req(ri):
local_vars += ['size_t hdr_len;',
'void *hdr;']
- for _, attr in ri.struct["request"].member_list():
- if attr.presence_type() == 'count':
- local_vars += ['unsigned int i;']
- break
+ local_vars += put_local_vars(ri.struct['request'])
print_prototype(ri, direction, terminate=False)
ri.cw.block_start()
@@ -2418,6 +2449,9 @@ def print_dump(ri):
local_vars += ['size_t hdr_len;',
'void *hdr;']
+ if 'request' in ri.op[ri.op_mode]:
+ local_vars += put_local_vars(ri.struct['request'])
+
ri.cw.write_func_lvar(local_vars)
ri.cw.p('yds.yarg.ys = ys;')
@@ -2502,7 +2536,7 @@ def print_free_prototype(ri, direction, suffix=';'):
def print_nlflags_set(ri, direction):
name = op_prefix(ri, direction)
- ri.cw.write_func_prot(f'static inline void', f"{name}_set_nlflags",
+ ri.cw.write_func_prot('static inline void', f"{name}_set_nlflags",
[f"struct {name} *req", "__u16 nl_flags"])
ri.cw.block_start()
ri.cw.p('req->_nlmsg_flags = nl_flags;')
@@ -2533,7 +2567,7 @@ def _print_type(ri, direction, struct):
line = attr.presence_member(ri.ku_space, type_filter)
if line:
if not meta_started:
- ri.cw.block_start(line=f"struct")
+ ri.cw.block_start(line="struct")
meta_started = True
ri.cw.p(line)
if meta_started:
@@ -2697,7 +2731,7 @@ def print_dump_type_free(ri):
ri.cw.nl()
_free_type_members(ri, 'rsp', ri.struct['reply'], ref='obj.')
- ri.cw.p(f'free(rsp);')
+ ri.cw.p('free(rsp);')
ri.cw.block_end()
ri.cw.block_end()
ri.cw.nl()
@@ -2708,7 +2742,7 @@ def print_ntf_type_free(ri):
ri.cw.block_start()
_free_type_members_iter(ri, ri.struct['reply'])
_free_type_members(ri, 'rsp', ri.struct['reply'], ref='obj.')
- ri.cw.p(f'free(rsp);')
+ ri.cw.p('free(rsp);')
ri.cw.block_end()
ri.cw.nl()
@@ -2803,8 +2837,6 @@ def print_kernel_policy_sparse_enum_validates(family, cw):
cw.p('/* Sparse enums validation callbacks */')
first = False
- sign = '' if attr.type[0] == 'u' else '_signed'
- suffix = 'ULL' if attr.type[0] == 'u' else 'LL'
cw.write_func_prot('static int', f'{c_lower(attr.enum_name)}_validate',
['const struct nlattr *attr', 'struct netlink_ext_ack *extack'])
cw.block_start()
@@ -2881,12 +2913,12 @@ def print_kernel_op_table_fwd(family, cw, terminate):
continue
if 'do' in op:
- name = c_lower(f"{family.ident_name}-nl-{op_name}-doit")
+ name = c_lower(f"{family.fn_prefix}-{op_name}-doit")
cw.write_func_prot('int', name,
['struct sk_buff *skb', 'struct genl_info *info'], suffix=';')
if 'dump' in op:
- name = c_lower(f"{family.ident_name}-nl-{op_name}-dumpit")
+ name = c_lower(f"{family.fn_prefix}-{op_name}-dumpit")
cw.write_func_prot('int', name,
['struct sk_buff *skb', 'struct netlink_callback *cb'], suffix=';')
cw.nl()
@@ -2912,7 +2944,7 @@ def print_kernel_op_table(family, cw):
for x in op['dont-validate']])), )
for op_mode in ['do', 'dump']:
if op_mode in op:
- name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it")
+ name = c_lower(f"{family.fn_prefix}-{op_name}-{op_mode}it")
members.append((op_mode + 'it', name))
if family.kernel_policy == 'per-op':
struct = Struct(family, op['attribute-set'],
@@ -2950,7 +2982,7 @@ def print_kernel_op_table(family, cw):
members.append(('validate',
' | '.join([c_upper('genl-dont-validate-' + x)
for x in dont_validate])), )
- name = c_lower(f"{family.ident_name}-nl-{op_name}-{op_mode}it")
+ name = c_lower(f"{family.fn_prefix}-{op_name}-{op_mode}it")
if 'pre' in op[op_mode]:
members.append((cb_names[op_mode]['pre'], c_lower(op[op_mode]['pre'])))
members.append((op_mode + 'it', name))
@@ -3211,8 +3243,9 @@ def render_uapi(family, cw):
cw.block_end(line=';')
cw.nl()
elif const['type'] == 'const':
+ name_pfx = const.get('name-prefix', f"{family.ident_name}-")
defines.append([c_upper(family.get('c-define-name',
- f"{family.ident_name}-{const['name']}")),
+ f"{name_pfx}{const['name']}")),
const['value']])
if defines:
@@ -3324,7 +3357,7 @@ def render_user_family(family, cw, prototype):
cw.block_start(f'{symbol} = ')
cw.p(f'.name\t\t= "{family.c_name}",')
if family.is_classic():
- cw.p(f'.is_classic\t= true,')
+ cw.p('.is_classic\t= true,')
cw.p(f'.classic_id\t= {family.get("protonum")},')
if family.is_classic():
if family.fixed_header:
@@ -3371,6 +3404,7 @@ def main():
help='Do not overwrite the output file if the new output is identical to the old')
parser.add_argument('--exclude-op', action='append', default=[])
parser.add_argument('-o', dest='out_file', type=str, default=None)
+ parser.add_argument('--function-prefix', dest='fn_prefix', type=str)
args = parser.parse_args()
if args.header is None:
@@ -3379,7 +3413,7 @@ def main():
exclude_ops = [re.compile(expr) for expr in args.exclude_op]
try:
- parsed = Family(args.spec, exclude_ops)
+ parsed = Family(args.spec, exclude_ops, args.fn_prefix)
if parsed.license != '((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)':
print('Spec license:', parsed.license)
print('License must be: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause)')
@@ -3399,11 +3433,16 @@ def main():
cw.p("/* Do not edit directly, auto-generated from: */")
cw.p(f"/*\t{spec_kernel} */")
cw.p(f"/* YNL-GEN {args.mode} {'header' if args.header else 'source'} */")
- if args.exclude_op or args.user_header:
+ if args.exclude_op or args.user_header or args.fn_prefix:
line = ''
- line += ' --user-header '.join([''] + args.user_header)
- line += ' --exclude-op '.join([''] + args.exclude_op)
+ if args.user_header:
+ line += ' --user-header '.join([''] + args.user_header)
+ if args.exclude_op:
+ line += ' --exclude-op '.join([''] + args.exclude_op)
+ if args.fn_prefix:
+ line += f' --function-prefix {args.fn_prefix}'
cw.p(f'/* YNL-ARG{line} */')
+ cw.p('/* To regenerate run: tools/net/ynl/ynl-regen.sh */')
cw.nl()
if args.mode == 'uapi':
diff --git a/tools/net/ynl/pyynl/ynl_gen_rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py
index 0cb6348e28d3..90ae19aac89d 100755
--- a/tools/net/ynl/pyynl/ynl_gen_rst.py
+++ b/tools/net/ynl/pyynl/ynl_gen_rst.py
@@ -10,353 +10,17 @@
This script performs extensive parsing to the Linux kernel's netlink YAML
spec files, in an effort to avoid needing to heavily mark up the original
- YAML file.
-
- This code is split in three big parts:
- 1) RST formatters: Use to convert a string to a RST output
- 2) Parser helpers: Functions to parse the YAML data structure
- 3) Main function and small helpers
+ YAML file. It uses the library code from scripts/lib.
"""
-from typing import Any, Dict, List
import os.path
+import pathlib
import sys
import argparse
import logging
-import yaml
-
-
-SPACE_PER_LEVEL = 4
-
-
-# RST Formatters
-# ==============
-def headroom(level: int) -> str:
- """Return space to format"""
- return " " * (level * SPACE_PER_LEVEL)
-
-
-def bold(text: str) -> str:
- """Format bold text"""
- return f"**{text}**"
-
-
-def inline(text: str) -> str:
- """Format inline text"""
- return f"``{text}``"
-
-
-def sanitize(text: str) -> str:
- """Remove newlines and multiple spaces"""
- # This is useful for some fields that are spread across multiple lines
- return str(text).replace("\n", " ").strip()
-
-
-def rst_fields(key: str, value: str, level: int = 0) -> str:
- """Return a RST formatted field"""
- return headroom(level) + f":{key}: {value}"
-
-
-def rst_definition(key: str, value: Any, level: int = 0) -> str:
- """Format a single rst definition"""
- return headroom(level) + key + "\n" + headroom(level + 1) + str(value)
-
-
-def rst_paragraph(paragraph: str, level: int = 0) -> str:
- """Return a formatted paragraph"""
- return headroom(level) + paragraph
-
-
-def rst_bullet(item: str, level: int = 0) -> str:
- """Return a formatted a bullet"""
- return headroom(level) + f"- {item}"
-
-
-def rst_subsection(title: str) -> str:
- """Add a sub-section to the document"""
- return f"{title}\n" + "-" * len(title)
-
-
-def rst_subsubsection(title: str) -> str:
- """Add a sub-sub-section to the document"""
- return f"{title}\n" + "~" * len(title)
-
-
-def rst_section(namespace: str, prefix: str, title: str) -> str:
- """Add a section to the document"""
- return f".. _{namespace}-{prefix}-{title}:\n\n{title}\n" + "=" * len(title)
-
-
-def rst_subtitle(title: str) -> str:
- """Add a subtitle to the document"""
- return "\n" + "-" * len(title) + f"\n{title}\n" + "-" * len(title) + "\n\n"
-
-
-def rst_title(title: str) -> str:
- """Add a title to the document"""
- return "=" * len(title) + f"\n{title}\n" + "=" * len(title) + "\n\n"
-
-
-def rst_list_inline(list_: List[str], level: int = 0) -> str:
- """Format a list using inlines"""
- return headroom(level) + "[" + ", ".join(inline(i) for i in list_) + "]"
-
-
-def rst_ref(namespace: str, prefix: str, name: str) -> str:
- """Add a hyperlink to the document"""
- mappings = {'enum': 'definition',
- 'fixed-header': 'definition',
- 'nested-attributes': 'attribute-set',
- 'struct': 'definition'}
- if prefix in mappings:
- prefix = mappings[prefix]
- return f":ref:`{namespace}-{prefix}-{name}`"
-
-
-def rst_header() -> str:
- """The headers for all the auto generated RST files"""
- lines = []
-
- lines.append(rst_paragraph(".. SPDX-License-Identifier: GPL-2.0"))
- lines.append(rst_paragraph(".. NOTE: This document was auto-generated.\n\n"))
-
- return "\n".join(lines)
-
-
-def rst_toctree(maxdepth: int = 2) -> str:
- """Generate a toctree RST primitive"""
- lines = []
-
- lines.append(".. toctree::")
- lines.append(f" :maxdepth: {maxdepth}\n\n")
-
- return "\n".join(lines)
-
-
-def rst_label(title: str) -> str:
- """Return a formatted label"""
- return f".. _{title}:\n\n"
-
-
-# Parsers
-# =======
-
-
-def parse_mcast_group(mcast_group: List[Dict[str, Any]]) -> str:
- """Parse 'multicast' group list and return a formatted string"""
- lines = []
- for group in mcast_group:
- lines.append(rst_bullet(group["name"]))
-
- return "\n".join(lines)
-
-
-def parse_do(do_dict: Dict[str, Any], level: int = 0) -> str:
- """Parse 'do' section and return a formatted string"""
- lines = []
- for key in do_dict.keys():
- lines.append(rst_paragraph(bold(key), level + 1))
- if key in ['request', 'reply']:
- lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n")
- else:
- lines.append(headroom(level + 2) + do_dict[key] + "\n")
-
- return "\n".join(lines)
-
-
-def parse_do_attributes(attrs: Dict[str, Any], level: int = 0) -> str:
- """Parse 'attributes' section"""
- if "attributes" not in attrs:
- return ""
- lines = [rst_fields("attributes", rst_list_inline(attrs["attributes"]), level + 1)]
-
- return "\n".join(lines)
-
-
-def parse_operations(operations: List[Dict[str, Any]], namespace: str) -> str:
- """Parse operations block"""
- preprocessed = ["name", "doc", "title", "do", "dump", "flags"]
- linkable = ["fixed-header", "attribute-set"]
- lines = []
-
- for operation in operations:
- lines.append(rst_section(namespace, 'operation', operation["name"]))
- lines.append(rst_paragraph(operation["doc"]) + "\n")
-
- for key in operation.keys():
- if key in preprocessed:
- # Skip the special fields
- continue
- value = operation[key]
- if key in linkable:
- value = rst_ref(namespace, key, value)
- lines.append(rst_fields(key, value, 0))
- if 'flags' in operation:
- lines.append(rst_fields('flags', rst_list_inline(operation['flags'])))
-
- if "do" in operation:
- lines.append(rst_paragraph(":do:", 0))
- lines.append(parse_do(operation["do"], 0))
- if "dump" in operation:
- lines.append(rst_paragraph(":dump:", 0))
- lines.append(parse_do(operation["dump"], 0))
-
- # New line after fields
- lines.append("\n")
-
- return "\n".join(lines)
-
-
-def parse_entries(entries: List[Dict[str, Any]], level: int) -> str:
- """Parse a list of entries"""
- ignored = ["pad"]
- lines = []
- for entry in entries:
- if isinstance(entry, dict):
- # entries could be a list or a dictionary
- field_name = entry.get("name", "")
- if field_name in ignored:
- continue
- type_ = entry.get("type")
- if type_:
- field_name += f" ({inline(type_)})"
- lines.append(
- rst_fields(field_name, sanitize(entry.get("doc", "")), level)
- )
- elif isinstance(entry, list):
- lines.append(rst_list_inline(entry, level))
- else:
- lines.append(rst_bullet(inline(sanitize(entry)), level))
-
- lines.append("\n")
- return "\n".join(lines)
-
-
-def parse_definitions(defs: Dict[str, Any], namespace: str) -> str:
- """Parse definitions section"""
- preprocessed = ["name", "entries", "members"]
- ignored = ["render-max"] # This is not printed
- lines = []
-
- for definition in defs:
- lines.append(rst_section(namespace, 'definition', definition["name"]))
- for k in definition.keys():
- if k in preprocessed + ignored:
- continue
- lines.append(rst_fields(k, sanitize(definition[k]), 0))
-
- # Field list needs to finish with a new line
- lines.append("\n")
- if "entries" in definition:
- lines.append(rst_paragraph(":entries:", 0))
- lines.append(parse_entries(definition["entries"], 1))
- if "members" in definition:
- lines.append(rst_paragraph(":members:", 0))
- lines.append(parse_entries(definition["members"], 1))
-
- return "\n".join(lines)
-
-
-def parse_attr_sets(entries: List[Dict[str, Any]], namespace: str) -> str:
- """Parse attribute from attribute-set"""
- preprocessed = ["name", "type"]
- linkable = ["enum", "nested-attributes", "struct", "sub-message"]
- ignored = ["checks"]
- lines = []
-
- for entry in entries:
- lines.append(rst_section(namespace, 'attribute-set', entry["name"]))
- for attr in entry["attributes"]:
- type_ = attr.get("type")
- attr_line = attr["name"]
- if type_:
- # Add the attribute type in the same line
- attr_line += f" ({inline(type_)})"
-
- lines.append(rst_subsubsection(attr_line))
-
- for k in attr.keys():
- if k in preprocessed + ignored:
- continue
- if k in linkable:
- value = rst_ref(namespace, k, attr[k])
- else:
- value = sanitize(attr[k])
- lines.append(rst_fields(k, value, 0))
- lines.append("\n")
-
- return "\n".join(lines)
-
-
-def parse_sub_messages(entries: List[Dict[str, Any]], namespace: str) -> str:
- """Parse sub-message definitions"""
- lines = []
-
- for entry in entries:
- lines.append(rst_section(namespace, 'sub-message', entry["name"]))
- for fmt in entry["formats"]:
- value = fmt["value"]
-
- lines.append(rst_bullet(bold(value)))
- for attr in ['fixed-header', 'attribute-set']:
- if attr in fmt:
- lines.append(rst_fields(attr,
- rst_ref(namespace, attr, fmt[attr]),
- 1))
- lines.append("\n")
-
- return "\n".join(lines)
-
-
-def parse_yaml(obj: Dict[str, Any]) -> str:
- """Format the whole YAML into a RST string"""
- lines = []
-
- # Main header
-
- lines.append(rst_header())
-
- family = obj['name']
-
- title = f"Family ``{family}`` netlink specification"
- lines.append(rst_title(title))
- lines.append(rst_paragraph(".. contents:: :depth: 3\n"))
-
- if "doc" in obj:
- lines.append(rst_subtitle("Summary"))
- lines.append(rst_paragraph(obj["doc"], 0))
-
- # Operations
- if "operations" in obj:
- lines.append(rst_subtitle("Operations"))
- lines.append(parse_operations(obj["operations"]["list"], family))
-
- # Multicast groups
- if "mcast-groups" in obj:
- lines.append(rst_subtitle("Multicast groups"))
- lines.append(parse_mcast_group(obj["mcast-groups"]["list"]))
-
- # Definitions
- if "definitions" in obj:
- lines.append(rst_subtitle("Definitions"))
- lines.append(parse_definitions(obj["definitions"], family))
-
- # Attributes set
- if "attribute-sets" in obj:
- lines.append(rst_subtitle("Attribute sets"))
- lines.append(parse_attr_sets(obj["attribute-sets"], family))
-
- # Sub-messages
- if "sub-messages" in obj:
- lines.append(rst_subtitle("Sub-messages"))
- lines.append(parse_sub_messages(obj["sub-messages"], family))
-
- return "\n".join(lines)
-
-
-# Main functions
-# ==============
+sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
+from lib import YnlDocGenerator # pylint: disable=C0413
def parse_arguments() -> argparse.Namespace:
"""Parse arguments from user"""
@@ -367,9 +31,6 @@ def parse_arguments() -> argparse.Namespace:
# Index and input are mutually exclusive
group = parser.add_mutually_exclusive_group()
- group.add_argument(
- "-x", "--index", action="store_true", help="Generate the index page"
- )
group.add_argument("-i", "--input", help="YAML file name")
args = parser.parse_args()
@@ -391,15 +52,6 @@ def parse_arguments() -> argparse.Namespace:
return args
-def parse_yaml_file(filename: str) -> str:
- """Transform the YAML specified by filename into an RST-formatted string"""
- with open(filename, "r", encoding="utf-8") as spec_file:
- yaml_data = yaml.safe_load(spec_file)
- content = parse_yaml(yaml_data)
-
- return content
-
-
def write_to_rstfile(content: str, filename: str) -> None:
"""Write the generated content into an RST file"""
logging.debug("Saving RST file to %s", filename)
@@ -408,35 +60,17 @@ def write_to_rstfile(content: str, filename: str) -> None:
rst_file.write(content)
-def generate_main_index_rst(output: str) -> None:
- """Generate the `networking_spec/index` content and write to the file"""
- lines = []
-
- lines.append(rst_header())
- lines.append(rst_label("specs"))
- lines.append(rst_title("Netlink Family Specifications"))
- lines.append(rst_toctree(1))
-
- index_dir = os.path.dirname(output)
- logging.debug("Looking for .rst files in %s", index_dir)
- for filename in sorted(os.listdir(index_dir)):
- if not filename.endswith(".rst") or filename == "index.rst":
- continue
- lines.append(f" {filename.replace('.rst', '')}\n")
-
- logging.debug("Writing an index file at %s", output)
- write_to_rstfile("".join(lines), output)
-
-
def main() -> None:
"""Main function that reads the YAML files and generates the RST files"""
args = parse_arguments()
+ parser = YnlDocGenerator()
+
if args.input:
logging.debug("Parsing %s", args.input)
try:
- content = parse_yaml_file(os.path.join(args.input))
+ content = parser.parse_yaml_file(os.path.join(args.input))
except Exception as exception:
logging.warning("Failed to parse %s.", args.input)
logging.warning(exception)
@@ -444,10 +78,6 @@ def main() -> None:
write_to_rstfile(content, args.output)
- if args.index:
- # Generate the index RST file
- generate_main_index_rst(args.output)
-
if __name__ == "__main__":
main()
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
index 7f5fca7682d7..05087ee323ba 100644
--- a/tools/net/ynl/samples/.gitignore
+++ b/tools/net/ynl/samples/.gitignore
@@ -7,3 +7,4 @@ rt-addr
rt-link
rt-route
tc
+tc-filter-add
diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile
index c9494a564da4..d76cbd41cbb1 100644
--- a/tools/net/ynl/samples/Makefile
+++ b/tools/net/ynl/samples/Makefile
@@ -19,6 +19,7 @@ include $(wildcard *.d)
all: $(BINS)
CFLAGS_page-pool=$(CFLAGS_netdev)
+CFLAGS_tc-filter-add:=$(CFLAGS_tc)
$(BINS): ../lib/ynl.a ../generated/protos.a $(SRCS)
@echo -e '\tCC sample $@'
diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c
deleted file mode 100644
index e5d521320fbf..000000000000
--- a/tools/net/ynl/samples/page-pool.c
+++ /dev/null
@@ -1,149 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <string.h>
-
-#include <ynl.h>
-
-#include <net/if.h>
-
-#include "netdev-user.h"
-
-struct stat {
- unsigned int ifc;
-
- struct {
- unsigned int cnt;
- size_t refs, bytes;
- } live[2];
-
- size_t alloc_slow, alloc_fast, recycle_ring, recycle_cache;
-};
-
-struct stats_array {
- unsigned int i, max;
- struct stat *s;
-};
-
-static struct stat *find_ifc(struct stats_array *a, unsigned int ifindex)
-{
- unsigned int i;
-
- for (i = 0; i < a->i; i++) {
- if (a->s[i].ifc == ifindex)
- return &a->s[i];
- }
-
- a->i++;
- if (a->i == a->max) {
- a->max *= 2;
- a->s = reallocarray(a->s, a->max, sizeof(*a->s));
- }
- a->s[i].ifc = ifindex;
- return &a->s[i];
-}
-
-static void count(struct stat *s, unsigned int l,
- struct netdev_page_pool_get_rsp *pp)
-{
- s->live[l].cnt++;
- if (pp->_present.inflight)
- s->live[l].refs += pp->inflight;
- if (pp->_present.inflight_mem)
- s->live[l].bytes += pp->inflight_mem;
-}
-
-int main(int argc, char **argv)
-{
- struct netdev_page_pool_stats_get_list *pp_stats;
- struct netdev_page_pool_get_list *pools;
- struct stats_array a = {};
- struct ynl_error yerr;
- struct ynl_sock *ys;
-
- ys = ynl_sock_create(&ynl_netdev_family, &yerr);
- if (!ys) {
- fprintf(stderr, "YNL: %s\n", yerr.msg);
- return 1;
- }
-
- a.max = 128;
- a.s = calloc(a.max, sizeof(*a.s));
- if (!a.s)
- goto err_close;
-
- pools = netdev_page_pool_get_dump(ys);
- if (!pools)
- goto err_free;
-
- ynl_dump_foreach(pools, pp) {
- struct stat *s = find_ifc(&a, pp->ifindex);
-
- count(s, 1, pp);
- if (pp->_present.detach_time)
- count(s, 0, pp);
- }
- netdev_page_pool_get_list_free(pools);
-
- pp_stats = netdev_page_pool_stats_get_dump(ys);
- if (!pp_stats)
- goto err_free;
-
- ynl_dump_foreach(pp_stats, pp) {
- struct stat *s = find_ifc(&a, pp->info.ifindex);
-
- if (pp->_present.alloc_fast)
- s->alloc_fast += pp->alloc_fast;
- if (pp->_present.alloc_refill)
- s->alloc_fast += pp->alloc_refill;
- if (pp->_present.alloc_slow)
- s->alloc_slow += pp->alloc_slow;
- if (pp->_present.recycle_ring)
- s->recycle_ring += pp->recycle_ring;
- if (pp->_present.recycle_cached)
- s->recycle_cache += pp->recycle_cached;
- }
- netdev_page_pool_stats_get_list_free(pp_stats);
-
- for (unsigned int i = 0; i < a.i; i++) {
- char ifname[IF_NAMESIZE];
- struct stat *s = &a.s[i];
- const char *name;
- double recycle;
-
- if (!s->ifc) {
- name = "<orphan>\t";
- } else {
- name = if_indextoname(s->ifc, ifname);
- if (name)
- printf("%8s", name);
- printf("[%u]\t", s->ifc);
- }
-
- printf("page pools: %u (zombies: %u)\n",
- s->live[1].cnt, s->live[0].cnt);
- printf("\t\trefs: %zu bytes: %zu (refs: %zu bytes: %zu)\n",
- s->live[1].refs, s->live[1].bytes,
- s->live[0].refs, s->live[0].bytes);
-
- /* We don't know how many pages are sitting in cache and ring
- * so we will under-count the recycling rate a bit.
- */
- recycle = (double)(s->recycle_ring + s->recycle_cache) /
- (s->alloc_fast + s->alloc_slow) * 100;
- printf("\t\trecycling: %.1lf%% (alloc: %zu:%zu recycle: %zu:%zu)\n",
- recycle, s->alloc_slow, s->alloc_fast,
- s->recycle_ring, s->recycle_cache);
- }
-
- ynl_sock_destroy(ys);
- return 0;
-
-err_free:
- free(a.s);
-err_close:
- fprintf(stderr, "YNL: %s\n", ys->err.msg);
- ynl_sock_destroy(ys);
- return 2;
-}
diff --git a/tools/net/ynl/samples/tc-filter-add.c b/tools/net/ynl/samples/tc-filter-add.c
new file mode 100644
index 000000000000..97871e9e9edc
--- /dev/null
+++ b/tools/net/ynl/samples/tc-filter-add.c
@@ -0,0 +1,335 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <arpa/inet.h>
+#include <linux/pkt_sched.h>
+#include <linux/tc_act/tc_vlan.h>
+#include <linux/tc_act/tc_gact.h>
+#include <linux/if_ether.h>
+#include <net/if.h>
+
+#include <ynl.h>
+
+#include "tc-user.h"
+
+#define TC_HANDLE (0xFFFF << 16)
+
+const char *vlan_act_name(struct tc_vlan *p)
+{
+ switch (p->v_action) {
+ case TCA_VLAN_ACT_POP:
+ return "pop";
+ case TCA_VLAN_ACT_PUSH:
+ return "push";
+ case TCA_VLAN_ACT_MODIFY:
+ return "modify";
+ default:
+ break;
+ }
+
+ return "not supported";
+}
+
+const char *gact_act_name(struct tc_gact *p)
+{
+ switch (p->action) {
+ case TC_ACT_SHOT:
+ return "drop";
+ case TC_ACT_OK:
+ return "ok";
+ case TC_ACT_PIPE:
+ return "pipe";
+ default:
+ break;
+ }
+
+ return "not supported";
+}
+
+static void print_vlan(struct tc_act_vlan_attrs *vlan)
+{
+ printf("%s ", vlan_act_name(vlan->parms));
+ if (vlan->_present.push_vlan_id)
+ printf("id %u ", vlan->push_vlan_id);
+ if (vlan->_present.push_vlan_protocol)
+ printf("protocol %#x ", ntohs(vlan->push_vlan_protocol));
+ if (vlan->_present.push_vlan_priority)
+ printf("priority %u ", vlan->push_vlan_priority);
+}
+
+static void print_gact(struct tc_act_gact_attrs *gact)
+{
+ struct tc_gact *p = gact->parms;
+
+ printf("%s ", gact_act_name(p));
+}
+
+static void flower_print(struct tc_flower_attrs *flower, const char *kind)
+{
+ struct tc_act_attrs *a;
+ unsigned int i;
+
+ printf("%s:\n", kind);
+
+ if (flower->_present.key_vlan_id)
+ printf(" vlan_id: %u\n", flower->key_vlan_id);
+ if (flower->_present.key_vlan_prio)
+ printf(" vlan_prio: %u\n", flower->key_vlan_prio);
+ if (flower->_present.key_num_of_vlans)
+ printf(" num_of_vlans: %u\n", flower->key_num_of_vlans);
+
+ for (i = 0; i < flower->_count.act; i++) {
+ a = &flower->act[i];
+ printf("action order: %i %s ", i + 1, a->kind);
+ if (a->options._present.vlan)
+ print_vlan(&a->options.vlan);
+ else if (a->options._present.gact)
+ print_gact(&a->options.gact);
+ printf("\n");
+ }
+ printf("\n");
+}
+
+static void tc_filter_print(struct tc_gettfilter_rsp *f)
+{
+ struct tc_options_msg *opt = &f->options;
+
+ if (opt->_present.flower)
+ flower_print(&opt->flower, f->kind);
+ else if (f->_len.kind)
+ printf("%s pref %u proto: %#x\n", f->kind,
+ (f->_hdr.tcm_info >> 16),
+ ntohs(TC_H_MIN(f->_hdr.tcm_info)));
+}
+
+static int tc_filter_add(struct ynl_sock *ys, int ifi)
+{
+ struct tc_newtfilter_req *req;
+ struct tc_act_attrs *acts;
+ struct tc_vlan p = {
+ .action = TC_ACT_PIPE,
+ .v_action = TCA_VLAN_ACT_PUSH
+ };
+ __u16 flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE;
+ int ret;
+
+ req = tc_newtfilter_req_alloc();
+ if (!req) {
+ fprintf(stderr, "tc_newtfilter_req_alloc failed\n");
+ return -1;
+ }
+ memset(req, 0, sizeof(*req));
+
+ acts = tc_act_attrs_alloc(3);
+ if (!acts) {
+ fprintf(stderr, "tc_act_attrs_alloc\n");
+ tc_newtfilter_req_free(req);
+ return -1;
+ }
+ memset(acts, 0, sizeof(*acts) * 3);
+
+ req->_hdr.tcm_ifindex = ifi;
+ req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+ req->_hdr.tcm_info = TC_H_MAKE(1 << 16, htons(ETH_P_8021Q));
+ req->chain = 0;
+
+ tc_newtfilter_req_set_nlflags(req, flags);
+ tc_newtfilter_req_set_kind(req, "flower");
+ tc_newtfilter_req_set_options_flower_key_vlan_id(req, 100);
+ tc_newtfilter_req_set_options_flower_key_vlan_prio(req, 5);
+ tc_newtfilter_req_set_options_flower_key_num_of_vlans(req, 3);
+
+ __tc_newtfilter_req_set_options_flower_act(req, acts, 3);
+
+ /* Skip action at index 0 because in TC, the action array
+ * index starts at 1, with each index defining the action's
+ * order. In contrast, in YNL indexed arrays start at index 0.
+ */
+ tc_act_attrs_set_kind(&acts[1], "vlan");
+ tc_act_attrs_set_options_vlan_parms(&acts[1], &p, sizeof(p));
+ tc_act_attrs_set_options_vlan_push_vlan_id(&acts[1], 200);
+ tc_act_attrs_set_kind(&acts[2], "vlan");
+ tc_act_attrs_set_options_vlan_parms(&acts[2], &p, sizeof(p));
+ tc_act_attrs_set_options_vlan_push_vlan_id(&acts[2], 300);
+
+ tc_newtfilter_req_set_options_flower_flags(req, 0);
+ tc_newtfilter_req_set_options_flower_key_eth_type(req, htons(0x8100));
+
+ ret = tc_newtfilter(ys, req);
+ if (ret)
+ fprintf(stderr, "tc_newtfilter: %s\n", ys->err.msg);
+
+ tc_newtfilter_req_free(req);
+
+ return ret;
+}
+
+static int tc_filter_show(struct ynl_sock *ys, int ifi)
+{
+ struct tc_gettfilter_req_dump *req;
+ struct tc_gettfilter_list *rsp;
+
+ req = tc_gettfilter_req_dump_alloc();
+ if (!req) {
+ fprintf(stderr, "tc_gettfilter_req_dump_alloc failed\n");
+ return -1;
+ }
+ memset(req, 0, sizeof(*req));
+
+ req->_hdr.tcm_ifindex = ifi;
+ req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+ req->_present.chain = 1;
+ req->chain = 0;
+
+ rsp = tc_gettfilter_dump(ys, req);
+ tc_gettfilter_req_dump_free(req);
+ if (!rsp) {
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+ return -1;
+ }
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no filters reported\n");
+ else
+ ynl_dump_foreach(rsp, flt) tc_filter_print(flt);
+
+ tc_gettfilter_list_free(rsp);
+
+ return 0;
+}
+
+static int tc_filter_del(struct ynl_sock *ys, int ifi)
+{
+ struct tc_deltfilter_req *req;
+ __u16 flags = NLM_F_REQUEST;
+ int ret;
+
+ req = tc_deltfilter_req_alloc();
+ if (!req) {
+ fprintf(stderr, "tc_deltfilter_req_alloc failed\n");
+ return -1;
+ }
+ memset(req, 0, sizeof(*req));
+
+ req->_hdr.tcm_ifindex = ifi;
+ req->_hdr.tcm_parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+ req->_hdr.tcm_info = TC_H_MAKE(1 << 16, htons(ETH_P_8021Q));
+ tc_deltfilter_req_set_nlflags(req, flags);
+
+ ret = tc_deltfilter(ys, req);
+ if (ret)
+ fprintf(stderr, "tc_deltfilter failed: %s\n", ys->err.msg);
+
+ tc_deltfilter_req_free(req);
+
+ return ret;
+}
+
+static int tc_clsact_add(struct ynl_sock *ys, int ifi)
+{
+ struct tc_newqdisc_req *req;
+ __u16 flags = NLM_F_REQUEST | NLM_F_EXCL | NLM_F_CREATE;
+ int ret;
+
+ req = tc_newqdisc_req_alloc();
+ if (!req) {
+ fprintf(stderr, "tc_newqdisc_req_alloc failed\n");
+ return -1;
+ }
+ memset(req, 0, sizeof(*req));
+
+ req->_hdr.tcm_ifindex = ifi;
+ req->_hdr.tcm_parent = TC_H_CLSACT;
+ req->_hdr.tcm_handle = TC_HANDLE;
+ tc_newqdisc_req_set_nlflags(req, flags);
+ tc_newqdisc_req_set_kind(req, "clsact");
+
+ ret = tc_newqdisc(ys, req);
+ if (ret)
+ fprintf(stderr, "tc_newqdisc failed: %s\n", ys->err.msg);
+
+ tc_newqdisc_req_free(req);
+
+ return ret;
+}
+
+static int tc_clsact_del(struct ynl_sock *ys, int ifi)
+{
+ struct tc_delqdisc_req *req;
+ __u16 flags = NLM_F_REQUEST;
+ int ret;
+
+ req = tc_delqdisc_req_alloc();
+ if (!req) {
+ fprintf(stderr, "tc_delqdisc_req_alloc failed\n");
+ return -1;
+ }
+ memset(req, 0, sizeof(*req));
+
+ req->_hdr.tcm_ifindex = ifi;
+ req->_hdr.tcm_parent = TC_H_CLSACT;
+ req->_hdr.tcm_handle = TC_HANDLE;
+ tc_delqdisc_req_set_nlflags(req, flags);
+
+ ret = tc_delqdisc(ys, req);
+ if (ret)
+ fprintf(stderr, "tc_delqdisc failed: %s\n", ys->err.msg);
+
+ tc_delqdisc_req_free(req);
+
+ return ret;
+}
+
+static int tc_filter_config(struct ynl_sock *ys, int ifi)
+{
+ int ret = 0;
+
+ if (tc_filter_add(ys, ifi))
+ return -1;
+
+ ret = tc_filter_show(ys, ifi);
+
+ if (tc_filter_del(ys, ifi))
+ return -1;
+
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ifi, ret = 0;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <interface_name>\n", argv[0]);
+ return 1;
+ }
+ ifi = if_nametoindex(argv[1]);
+ if (!ifi) {
+ perror("if_nametoindex");
+ return 1;
+ }
+
+ ys = ynl_sock_create(&ynl_tc_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ if (tc_clsact_add(ys, ifi)) {
+ ret = 2;
+ goto err_destroy;
+ }
+
+ if (tc_filter_config(ys, ifi))
+ ret = 3;
+
+ if (tc_clsact_del(ys, ifi))
+ ret = 4;
+
+err_destroy:
+ ynl_sock_destroy(ys);
+ return ret;
+}
diff --git a/tools/net/ynl/tests/Makefile b/tools/net/ynl/tests/Makefile
new file mode 100644
index 000000000000..c1df2e001255
--- /dev/null
+++ b/tools/net/ynl/tests/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for YNL tests
+
+TESTS := \
+ test_ynl_cli.sh \
+ test_ynl_ethtool.sh \
+# end of TESTS
+
+all: $(TESTS)
+
+run_tests:
+ @for test in $(TESTS); do \
+ ./$$test; \
+ done
+
+install: $(TESTS)
+ @mkdir -p $(DESTDIR)/usr/bin
+ @mkdir -p $(DESTDIR)/usr/share/kselftest
+ @cp ../../../testing/selftests/kselftest/ktap_helpers.sh $(DESTDIR)/usr/share/kselftest/
+ @for test in $(TESTS); do \
+ name=$$(basename $$test .sh); \
+ sed -e 's|^ynl=.*|ynl="ynl"|' \
+ -e 's|^ynl_ethtool=.*|ynl_ethtool="ynl-ethtool"|' \
+ -e 's|KSELFTEST_KTAP_HELPERS=.*|KSELFTEST_KTAP_HELPERS="/usr/share/kselftest/ktap_helpers.sh"|' \
+ $$test > $(DESTDIR)/usr/bin/$$name; \
+ chmod +x $(DESTDIR)/usr/bin/$$name; \
+ done
+
+clean distclean:
+ @# Nothing to clean
+
+.PHONY: all install clean run_tests
diff --git a/tools/net/ynl/tests/config b/tools/net/ynl/tests/config
new file mode 100644
index 000000000000..339f1309c03f
--- /dev/null
+++ b/tools/net/ynl/tests/config
@@ -0,0 +1,6 @@
+CONFIG_DUMMY=m
+CONFIG_INET_DIAG=y
+CONFIG_IPV6=y
+CONFIG_NET_NS=y
+CONFIG_NETDEVSIM=m
+CONFIG_VETH=m
diff --git a/tools/net/ynl/tests/test_ynl_cli.sh b/tools/net/ynl/tests/test_ynl_cli.sh
new file mode 100755
index 000000000000..7c0722a08117
--- /dev/null
+++ b/tools/net/ynl/tests/test_ynl_cli.sh
@@ -0,0 +1,327 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test YNL CLI functionality
+
+# Load KTAP test helpers
+KSELFTEST_KTAP_HELPERS="$(dirname "$(realpath "$0")")/../../../testing/selftests/kselftest/ktap_helpers.sh"
+# shellcheck source=../../../testing/selftests/kselftest/ktap_helpers.sh
+source "$KSELFTEST_KTAP_HELPERS"
+
+# Default ynl path for direct execution, can be overridden by make install
+ynl="../pyynl/cli.py"
+
+readonly NSIM_ID="1338"
+readonly NSIM_DEV_NAME="nsim${NSIM_ID}"
+readonly VETH_A="veth_a"
+readonly VETH_B="veth_b"
+
+testns="ynl-$(mktemp -u XXXXXX)"
+TESTS_NO=0
+
+# Test listing available families
+cli_list_families()
+{
+ if $ynl --list-families &>/dev/null; then
+ ktap_test_pass "YNL CLI list families"
+ else
+ ktap_test_fail "YNL CLI list families"
+ fi
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test netdev family operations (dev-get, queue-get)
+cli_netdev_ops()
+{
+ local dev_output
+ local ifindex
+
+ ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null)
+
+ dev_output=$(ip netns exec "$testns" $ynl --family netdev \
+ --do dev-get --json "{\"ifindex\": $ifindex}" 2>/dev/null)
+
+ if ! echo "$dev_output" | grep -q "ifindex"; then
+ ktap_test_fail "YNL CLI netdev operations (netdev dev-get output missing ifindex)"
+ return
+ fi
+
+ if ! ip netns exec "$testns" $ynl --family netdev \
+ --dump queue-get --json "{\"ifindex\": $ifindex}" &>/dev/null; then
+ ktap_test_fail "YNL CLI netdev operations (failed to get netdev queue info)"
+ return
+ fi
+
+ ktap_test_pass "YNL CLI netdev operations"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test ethtool family operations (rings-get, linkinfo-get)
+cli_ethtool_ops()
+{
+ local rings_output
+ local linkinfo_output
+
+ rings_output=$(ip netns exec "$testns" $ynl --family ethtool \
+ --do rings-get --json "{\"header\": {\"dev-name\": \"$NSIM_DEV_NAME\"}}" 2>/dev/null)
+
+ if ! echo "$rings_output" | grep -q "header"; then
+ ktap_test_fail "YNL CLI ethtool operations (ethtool rings-get output missing header)"
+ return
+ fi
+
+ linkinfo_output=$(ip netns exec "$testns" $ynl --family ethtool \
+ --do linkinfo-get --json "{\"header\": {\"dev-name\": \"$VETH_A\"}}" 2>/dev/null)
+
+ if ! echo "$linkinfo_output" | grep -q "header"; then
+ ktap_test_fail "YNL CLI ethtool operations (ethtool linkinfo-get output missing header)"
+ return
+ fi
+
+ ktap_test_pass "YNL CLI ethtool operations"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-route family operations
+cli_rt_route_ops()
+{
+ local ifindex
+
+ if ! $ynl --list-families 2>/dev/null | grep -q "rt-route"; then
+ ktap_test_skip "YNL CLI rt-route operations (rt-route family not available)"
+ return
+ fi
+
+ ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null)
+
+ # Add route: 192.0.2.0/24 dev $dev scope link
+ if ! ip netns exec "$testns" $ynl --family rt-route --do newroute --create \
+ --json "{\"dst\": \"192.0.2.0\", \"oif\": $ifindex, \"rtm-dst-len\": 24, \"rtm-family\": 2, \"rtm-scope\": 253, \"rtm-type\": 1, \"rtm-protocol\": 3, \"rtm-table\": 254}" &>/dev/null; then
+ ktap_test_fail "YNL CLI rt-route operations (failed to add route)"
+ return
+ fi
+
+ local route_output
+ route_output=$(ip netns exec "$testns" $ynl --family rt-route --dump getroute 2>/dev/null)
+ if echo "$route_output" | grep -q "192.0.2.0"; then
+ ktap_test_pass "YNL CLI rt-route operations"
+ else
+ ktap_test_fail "YNL CLI rt-route operations (failed to verify route)"
+ fi
+
+ ip netns exec "$testns" $ynl --family rt-route --do delroute \
+ --json "{\"dst\": \"192.0.2.0\", \"oif\": $ifindex, \"rtm-dst-len\": 24, \"rtm-family\": 2, \"rtm-scope\": 253, \"rtm-type\": 1, \"rtm-protocol\": 3, \"rtm-table\": 254}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-addr family operations
+cli_rt_addr_ops()
+{
+ local ifindex
+
+ if ! $ynl --list-families 2>/dev/null | grep -q "rt-addr"; then
+ ktap_test_skip "YNL CLI rt-addr operations (rt-addr family not available)"
+ return
+ fi
+
+ ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null)
+
+ if ! ip netns exec "$testns" $ynl --family rt-addr --do newaddr \
+ --json "{\"ifa-index\": $ifindex, \"local\": \"192.0.2.100\", \"ifa-prefixlen\": 24, \"ifa-family\": 2}" &>/dev/null; then
+ ktap_test_fail "YNL CLI rt-addr operations (failed to add address)"
+ return
+ fi
+
+ local addr_output
+ addr_output=$(ip netns exec "$testns" $ynl --family rt-addr --dump getaddr 2>/dev/null)
+ if echo "$addr_output" | grep -q "192.0.2.100"; then
+ ktap_test_pass "YNL CLI rt-addr operations"
+ else
+ ktap_test_fail "YNL CLI rt-addr operations (failed to verify address)"
+ fi
+
+ ip netns exec "$testns" $ynl --family rt-addr --do deladdr \
+ --json "{\"ifa-index\": $ifindex, \"local\": \"192.0.2.100\", \"ifa-prefixlen\": 24, \"ifa-family\": 2}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-link family operations
+cli_rt_link_ops()
+{
+ if ! $ynl --list-families 2>/dev/null | grep -q "rt-link"; then
+ ktap_test_skip "YNL CLI rt-link operations (rt-link family not available)"
+ return
+ fi
+
+ if ! ip netns exec "$testns" $ynl --family rt-link --do newlink --create \
+ --json "{\"ifname\": \"dummy0\", \"linkinfo\": {\"kind\": \"dummy\"}}" &>/dev/null; then
+ ktap_test_fail "YNL CLI rt-link operations (failed to add link)"
+ return
+ fi
+
+ local link_output
+ link_output=$(ip netns exec "$testns" $ynl --family rt-link --dump getlink 2>/dev/null)
+ if echo "$link_output" | grep -q "$NSIM_DEV_NAME" && echo "$link_output" | grep -q "dummy0"; then
+ ktap_test_pass "YNL CLI rt-link operations"
+ else
+ ktap_test_fail "YNL CLI rt-link operations (failed to verify link)"
+ fi
+
+ ip netns exec "$testns" $ynl --family rt-link --do dellink \
+ --json "{\"ifname\": \"dummy0\"}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-neigh family operations
+cli_rt_neigh_ops()
+{
+ local ifindex
+
+ if ! $ynl --list-families 2>/dev/null | grep -q "rt-neigh"; then
+ ktap_test_skip "YNL CLI rt-neigh operations (rt-neigh family not available)"
+ return
+ fi
+
+ ifindex=$(ip netns exec "$testns" cat /sys/class/net/"$NSIM_DEV_NAME"/ifindex 2>/dev/null)
+
+ # Add neighbor: 192.0.2.1 dev nsim1338 lladdr 11:22:33:44:55:66 PERMANENT
+ if ! ip netns exec "$testns" $ynl --family rt-neigh --do newneigh --create \
+ --json "{\"ndm-ifindex\": $ifindex, \"dst\": \"192.0.2.1\", \"lladdr\": \"11:22:33:44:55:66\", \"ndm-family\": 2, \"ndm-state\": 128}" &>/dev/null; then
+ ktap_test_fail "YNL CLI rt-neigh operations (failed to add neighbor)"
+ fi
+
+ local neigh_output
+ neigh_output=$(ip netns exec "$testns" $ynl --family rt-neigh --dump getneigh 2>/dev/null)
+ if echo "$neigh_output" | grep -q "192.0.2.1"; then
+ ktap_test_pass "YNL CLI rt-neigh operations"
+ else
+ ktap_test_fail "YNL CLI rt-neigh operations (failed to verify neighbor)"
+ fi
+
+ ip netns exec "$testns" $ynl --family rt-neigh --do delneigh \
+ --json "{\"ndm-ifindex\": $ifindex, \"dst\": \"192.0.2.1\", \"lladdr\": \"11:22:33:44:55:66\", \"ndm-family\": 2}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test rt-rule family operations
+cli_rt_rule_ops()
+{
+ if ! $ynl --list-families 2>/dev/null | grep -q "rt-rule"; then
+ ktap_test_skip "YNL CLI rt-rule operations (rt-rule family not available)"
+ return
+ fi
+
+ # Add rule: from 192.0.2.0/24 lookup 100 none
+ if ! ip netns exec "$testns" $ynl --family rt-rule --do newrule \
+ --json "{\"family\": 2, \"src-len\": 24, \"src\": \"192.0.2.0\", \"table\": 100}" &>/dev/null; then
+ ktap_test_fail "YNL CLI rt-rule operations (failed to add rule)"
+ return
+ fi
+
+ local rule_output
+ rule_output=$(ip netns exec "$testns" $ynl --family rt-rule --dump getrule 2>/dev/null)
+ if echo "$rule_output" | grep -q "192.0.2.0"; then
+ ktap_test_pass "YNL CLI rt-rule operations"
+ else
+ ktap_test_fail "YNL CLI rt-rule operations (failed to verify rule)"
+ fi
+
+ ip netns exec "$testns" $ynl --family rt-rule --do delrule \
+ --json "{\"family\": 2, \"src-len\": 24, \"src\": \"192.0.2.0\", \"table\": 100}" &>/dev/null
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+# Test nlctrl family operations
+cli_nlctrl_ops()
+{
+ local family_output
+
+ if ! family_output=$($ynl --family nlctrl \
+ --do getfamily --json "{\"family-name\": \"netdev\"}" 2>/dev/null); then
+ ktap_test_fail "YNL CLI nlctrl getfamily (failed to get nlctrl family info)"
+ return
+ fi
+
+ if ! echo "$family_output" | grep -q "family-name"; then
+ ktap_test_fail "YNL CLI nlctrl getfamily (nlctrl getfamily output missing family-name)"
+ return
+ fi
+
+ if ! echo "$family_output" | grep -q "family-id"; then
+ ktap_test_fail "YNL CLI nlctrl getfamily (nlctrl getfamily output missing family-id)"
+ return
+ fi
+
+ ktap_test_pass "YNL CLI nlctrl getfamily"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+setup()
+{
+ modprobe netdevsim &> /dev/null
+ if ! [ -f /sys/bus/netdevsim/new_device ]; then
+ ktap_skip_all "netdevsim module not available"
+ exit "$KSFT_SKIP"
+ fi
+
+ if ! ip netns add "$testns" 2>/dev/null; then
+ ktap_skip_all "failed to create test namespace"
+ exit "$KSFT_SKIP"
+ fi
+
+ echo "$NSIM_ID 1" | ip netns exec "$testns" tee /sys/bus/netdevsim/new_device >/dev/null 2>&1 || {
+ ktap_skip_all "failed to create netdevsim device"
+ exit "$KSFT_SKIP"
+ }
+
+ local dev
+ dev=$(ip netns exec "$testns" ls /sys/bus/netdevsim/devices/netdevsim$NSIM_ID/net 2>/dev/null | head -1)
+ if [[ -z "$dev" ]]; then
+ ktap_skip_all "failed to find netdevsim device"
+ exit "$KSFT_SKIP"
+ fi
+
+ ip -netns "$testns" link set dev "$dev" name "$NSIM_DEV_NAME" 2>/dev/null || {
+ ktap_skip_all "failed to rename netdevsim device"
+ exit "$KSFT_SKIP"
+ }
+
+ ip -netns "$testns" link set dev "$NSIM_DEV_NAME" up 2>/dev/null
+
+ if ! ip -n "$testns" link add "$VETH_A" type veth peer name "$VETH_B" 2>/dev/null; then
+ ktap_skip_all "failed to create veth pair"
+ exit "$KSFT_SKIP"
+ fi
+
+ ip -n "$testns" link set "$VETH_A" up 2>/dev/null
+ ip -n "$testns" link set "$VETH_B" up 2>/dev/null
+}
+
+cleanup()
+{
+ ip netns exec "$testns" bash -c "echo $NSIM_ID > /sys/bus/netdevsim/del_device" 2>/dev/null || true
+ ip netns del "$testns" 2>/dev/null || true
+}
+
+# Check if ynl command is available
+if ! command -v $ynl &>/dev/null && [[ ! -x $ynl ]]; then
+ ktap_skip_all "ynl command not found: $ynl"
+ exit "$KSFT_SKIP"
+fi
+
+trap cleanup EXIT
+
+ktap_print_header
+setup
+ktap_set_plan "${TESTS_NO}"
+
+cli_list_families
+cli_netdev_ops
+cli_ethtool_ops
+cli_rt_route_ops
+cli_rt_addr_ops
+cli_rt_link_ops
+cli_rt_neigh_ops
+cli_rt_rule_ops
+cli_nlctrl_ops
+
+ktap_finished
diff --git a/tools/net/ynl/tests/test_ynl_ethtool.sh b/tools/net/ynl/tests/test_ynl_ethtool.sh
new file mode 100755
index 000000000000..b826269017f4
--- /dev/null
+++ b/tools/net/ynl/tests/test_ynl_ethtool.sh
@@ -0,0 +1,222 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Test YNL ethtool functionality
+
+# Load KTAP test helpers
+KSELFTEST_KTAP_HELPERS="$(dirname "$(realpath "$0")")/../../../testing/selftests/kselftest/ktap_helpers.sh"
+# shellcheck source=../../../testing/selftests/kselftest/ktap_helpers.sh
+source "$KSELFTEST_KTAP_HELPERS"
+
+# Default ynl-ethtool path for direct execution, can be overridden by make install
+ynl_ethtool="../pyynl/ethtool.py"
+
+readonly NSIM_ID="1337"
+readonly NSIM_DEV_NAME="nsim${NSIM_ID}"
+readonly VETH_A="veth_a"
+readonly VETH_B="veth_b"
+
+testns="ynl-ethtool-$(mktemp -u XXXXXX)"
+TESTS_NO=0
+
+# Uses veth device as netdevsim doesn't support basic ethtool device info
+ethtool_device_info()
+{
+ local info_output
+
+ info_output=$(ip netns exec "$testns" $ynl_ethtool "$VETH_A" 2>/dev/null)
+
+ if ! echo "$info_output" | grep -q "Settings for"; then
+ ktap_test_fail "YNL ethtool device info (device info output missing expected content)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool device info"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_statistics()
+{
+ local stats_output
+
+ stats_output=$(ip netns exec "$testns" $ynl_ethtool --statistics "$NSIM_DEV_NAME" 2>/dev/null)
+
+ if ! echo "$stats_output" | grep -q -E "(NIC statistics|packets|bytes)"; then
+ ktap_test_fail "YNL ethtool statistics (statistics output missing expected content)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool statistics"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_ring_params()
+{
+ local ring_output
+
+ ring_output=$(ip netns exec "$testns" $ynl_ethtool --show-ring "$NSIM_DEV_NAME" 2>/dev/null)
+
+ if ! echo "$ring_output" | grep -q -E "(Ring parameters|RX|TX)"; then
+ ktap_test_fail "YNL ethtool ring parameters (ring parameters output missing expected content)"
+ return
+ fi
+
+ if ! ip netns exec "$testns" $ynl_ethtool --set-ring "$NSIM_DEV_NAME" rx 64 2>/dev/null; then
+ ktap_test_fail "YNL ethtool ring parameters (set-ring command failed unexpectedly)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool ring parameters (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_coalesce_params()
+{
+ if ! ip netns exec "$testns" $ynl_ethtool --show-coalesce "$NSIM_DEV_NAME" &>/dev/null; then
+ ktap_test_fail "YNL ethtool coalesce parameters (failed to get coalesce parameters)"
+ return
+ fi
+
+ if ! ip netns exec "$testns" $ynl_ethtool --set-coalesce "$NSIM_DEV_NAME" rx-usecs 50 2>/dev/null; then
+ ktap_test_fail "YNL ethtool coalesce parameters (set-coalesce command failed unexpectedly)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool coalesce parameters (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_pause_params()
+{
+ if ! ip netns exec "$testns" $ynl_ethtool --show-pause "$NSIM_DEV_NAME" &>/dev/null; then
+ ktap_test_fail "YNL ethtool pause parameters (failed to get pause parameters)"
+ return
+ fi
+
+ if ! ip netns exec "$testns" $ynl_ethtool --set-pause "$NSIM_DEV_NAME" tx 1 rx 1 2>/dev/null; then
+ ktap_test_fail "YNL ethtool pause parameters (set-pause command failed unexpectedly)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool pause parameters (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_features_info()
+{
+ local features_output
+
+ features_output=$(ip netns exec "$testns" $ynl_ethtool --show-features "$NSIM_DEV_NAME" 2>/dev/null)
+
+ if ! echo "$features_output" | grep -q -E "(Features|offload)"; then
+ ktap_test_fail "YNL ethtool features info (features output missing expected content)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool features info (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_channels_info()
+{
+ local channels_output
+
+ channels_output=$(ip netns exec "$testns" $ynl_ethtool --show-channels "$NSIM_DEV_NAME" 2>/dev/null)
+
+ if ! echo "$channels_output" | grep -q -E "(Channel|Combined|RX|TX)"; then
+ ktap_test_fail "YNL ethtool channels info (channels output missing expected content)"
+ return
+ fi
+
+ if ! ip netns exec "$testns" $ynl_ethtool --set-channels "$NSIM_DEV_NAME" combined-count 1 2>/dev/null; then
+ ktap_test_fail "YNL ethtool channels info (set-channels command failed unexpectedly)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool channels info (show/set)"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+ethtool_time_stamping()
+{
+ local ts_output
+
+ ts_output=$(ip netns exec "$testns" $ynl_ethtool --show-time-stamping "$NSIM_DEV_NAME" 2>/dev/null)
+
+ if ! echo "$ts_output" | grep -q -E "(Time stamping|timestamping|SOF_TIMESTAMPING)"; then
+ ktap_test_fail "YNL ethtool time stamping (time stamping output missing expected content)"
+ return
+ fi
+
+ ktap_test_pass "YNL ethtool time stamping"
+}
+TESTS_NO=$((TESTS_NO + 1))
+
+setup()
+{
+ modprobe netdevsim &> /dev/null
+ if ! [ -f /sys/bus/netdevsim/new_device ]; then
+ ktap_skip_all "netdevsim module not available"
+ exit "$KSFT_SKIP"
+ fi
+
+ if ! ip netns add "$testns" 2>/dev/null; then
+ ktap_skip_all "failed to create test namespace"
+ exit "$KSFT_SKIP"
+ fi
+
+ echo "$NSIM_ID 1" | ip netns exec "$testns" tee /sys/bus/netdevsim/new_device >/dev/null 2>&1 || {
+ ktap_skip_all "failed to create netdevsim device"
+ exit "$KSFT_SKIP"
+ }
+
+ local dev
+ dev=$(ip netns exec "$testns" ls /sys/bus/netdevsim/devices/netdevsim$NSIM_ID/net 2>/dev/null | head -1)
+ if [[ -z "$dev" ]]; then
+ ktap_skip_all "failed to find netdevsim device"
+ exit "$KSFT_SKIP"
+ fi
+
+ ip -netns "$testns" link set dev "$dev" name "$NSIM_DEV_NAME" 2>/dev/null || {
+ ktap_skip_all "failed to rename netdevsim device"
+ exit "$KSFT_SKIP"
+ }
+
+ ip -netns "$testns" link set dev "$NSIM_DEV_NAME" up 2>/dev/null
+
+ if ! ip -n "$testns" link add "$VETH_A" type veth peer name "$VETH_B" 2>/dev/null; then
+ ktap_skip_all "failed to create veth pair"
+ exit "$KSFT_SKIP"
+ fi
+
+ ip -n "$testns" link set "$VETH_A" up 2>/dev/null
+ ip -n "$testns" link set "$VETH_B" up 2>/dev/null
+}
+
+cleanup()
+{
+ ip netns exec "$testns" bash -c "echo $NSIM_ID > /sys/bus/netdevsim/del_device" 2>/dev/null || true
+ ip netns del "$testns" 2>/dev/null || true
+}
+
+# Check if ynl-ethtool command is available
+if ! command -v $ynl_ethtool &>/dev/null && [[ ! -x $ynl_ethtool ]]; then
+ ktap_skip_all "ynl-ethtool command not found: $ynl_ethtool"
+ exit "$KSFT_SKIP"
+fi
+
+trap cleanup EXIT
+
+ktap_print_header
+setup
+ktap_set_plan "${TESTS_NO}"
+
+ethtool_device_info
+ethtool_statistics
+ethtool_ring_params
+ethtool_coalesce_params
+ethtool_pause_params
+ethtool_features_info
+ethtool_channels_info
+ethtool_time_stamping
+
+ktap_finished
diff --git a/tools/net/ynl/ynltool/.gitignore b/tools/net/ynl/ynltool/.gitignore
new file mode 100644
index 000000000000..690d399c921a
--- /dev/null
+++ b/tools/net/ynl/ynltool/.gitignore
@@ -0,0 +1,2 @@
+ynltool
+*.d
diff --git a/tools/net/ynl/ynltool/Makefile b/tools/net/ynl/ynltool/Makefile
new file mode 100644
index 000000000000..f5b1de32daa5
--- /dev/null
+++ b/tools/net/ynl/ynltool/Makefile
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+include ../Makefile.deps
+
+INSTALL ?= install
+prefix ?= /usr
+
+CC := gcc
+CFLAGS := -Wall -Wextra -Werror -O2
+ifeq ("$(DEBUG)","1")
+ CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
+endif
+CFLAGS += -I../lib -I../generated -I../../../include/uapi/
+
+SRC_VERSION := \
+ $(shell make --no-print-directory -sC ../../../.. kernelversion || \
+ echo "unknown")
+
+CFLAGS += -DSRC_VERSION='"$(SRC_VERSION)"'
+
+SRCS := $(wildcard *.c)
+OBJS := $(patsubst %.c,$(OUTPUT)%.o,$(SRCS))
+
+YNLTOOL := $(OUTPUT)ynltool
+
+include $(wildcard *.d)
+
+all: $(YNLTOOL)
+
+Q = @
+
+$(YNLTOOL): ../libynl.a $(OBJS)
+ $(Q)echo -e "\tLINK $@"
+ $(Q)$(CC) $(CFLAGS) -o $@ $(OBJS) ../libynl.a -lm
+
+%.o: %.c ../libynl.a
+ $(Q)echo -e "\tCC $@"
+ $(Q)$(COMPILE.c) -MMD -c -o $@ $<
+
+../libynl.a:
+ $(Q)$(MAKE) -C ../
+
+clean:
+ rm -f *.o *.d *~
+
+distclean: clean
+ rm -f $(YNLTOOL)
+
+bindir ?= /usr/bin
+
+install: $(YNLTOOL)
+ $(INSTALL) -m 0755 $(YNLTOOL) $(DESTDIR)$(bindir)/$(YNLTOOL)
+
+.PHONY: all clean distclean
+.DEFAULT_GOAL=all
diff --git a/tools/net/ynl/ynltool/json_writer.c b/tools/net/ynl/ynltool/json_writer.c
new file mode 100644
index 000000000000..c8685e592cd3
--- /dev/null
+++ b/tools/net/ynl/ynltool/json_writer.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-2-Clause)
+/*
+ * Simple streaming JSON writer
+ *
+ * This takes care of the annoying bits of JSON syntax like the commas
+ * after elements
+ *
+ * Authors: Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <malloc.h>
+#include <inttypes.h>
+#include <stdint.h>
+
+#include "json_writer.h"
+
+struct json_writer {
+ FILE *out;
+ unsigned depth;
+ bool pretty;
+ char sep;
+};
+
+static void jsonw_indent(json_writer_t *self)
+{
+ unsigned i;
+ for (i = 0; i < self->depth; ++i)
+ fputs(" ", self->out);
+}
+
+static void jsonw_eol(json_writer_t *self)
+{
+ if (!self->pretty)
+ return;
+
+ putc('\n', self->out);
+ jsonw_indent(self);
+}
+
+static void jsonw_eor(json_writer_t *self)
+{
+ if (self->sep != '\0')
+ putc(self->sep, self->out);
+ self->sep = ',';
+}
+
+static void jsonw_puts(json_writer_t *self, const char *str)
+{
+ putc('"', self->out);
+ for (; *str; ++str)
+ switch (*str) {
+ case '\t':
+ fputs("\\t", self->out);
+ break;
+ case '\n':
+ fputs("\\n", self->out);
+ break;
+ case '\r':
+ fputs("\\r", self->out);
+ break;
+ case '\f':
+ fputs("\\f", self->out);
+ break;
+ case '\b':
+ fputs("\\b", self->out);
+ break;
+ case '\\':
+ fputs("\\\\", self->out);
+ break;
+ case '"':
+ fputs("\\\"", self->out);
+ break;
+ default:
+ putc(*str, self->out);
+ }
+ putc('"', self->out);
+}
+
+json_writer_t *jsonw_new(FILE *f)
+{
+ json_writer_t *self = malloc(sizeof(*self));
+ if (self) {
+ self->out = f;
+ self->depth = 0;
+ self->pretty = false;
+ self->sep = '\0';
+ }
+ return self;
+}
+
+void jsonw_destroy(json_writer_t **self_p)
+{
+ json_writer_t *self = *self_p;
+
+ assert(self->depth == 0);
+ fputs("\n", self->out);
+ fflush(self->out);
+ free(self);
+ *self_p = NULL;
+}
+
+void jsonw_pretty(json_writer_t *self, bool on)
+{
+ self->pretty = on;
+}
+
+void jsonw_reset(json_writer_t *self)
+{
+ assert(self->depth == 0);
+ self->sep = '\0';
+}
+
+static void jsonw_begin(json_writer_t *self, int c)
+{
+ jsonw_eor(self);
+ putc(c, self->out);
+ ++self->depth;
+ self->sep = '\0';
+}
+
+static void jsonw_end(json_writer_t *self, int c)
+{
+ assert(self->depth > 0);
+
+ --self->depth;
+ if (self->sep != '\0')
+ jsonw_eol(self);
+ putc(c, self->out);
+ self->sep = ',';
+}
+
+void jsonw_name(json_writer_t *self, const char *name)
+{
+ jsonw_eor(self);
+ jsonw_eol(self);
+ self->sep = '\0';
+ jsonw_puts(self, name);
+ putc(':', self->out);
+ if (self->pretty)
+ putc(' ', self->out);
+}
+
+void jsonw_vprintf_enquote(json_writer_t *self, const char *fmt, va_list ap)
+{
+ jsonw_eor(self);
+ putc('"', self->out);
+ vfprintf(self->out, fmt, ap);
+ putc('"', self->out);
+}
+
+void jsonw_printf(json_writer_t *self, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ jsonw_eor(self);
+ vfprintf(self->out, fmt, ap);
+ va_end(ap);
+}
+
+void jsonw_start_object(json_writer_t *self)
+{
+ jsonw_begin(self, '{');
+}
+
+void jsonw_end_object(json_writer_t *self)
+{
+ jsonw_end(self, '}');
+}
+
+void jsonw_start_array(json_writer_t *self)
+{
+ jsonw_begin(self, '[');
+}
+
+void jsonw_end_array(json_writer_t *self)
+{
+ jsonw_end(self, ']');
+}
+
+void jsonw_string(json_writer_t *self, const char *value)
+{
+ jsonw_eor(self);
+ jsonw_puts(self, value);
+}
+
+void jsonw_bool(json_writer_t *self, bool val)
+{
+ jsonw_printf(self, "%s", val ? "true" : "false");
+}
+
+void jsonw_null(json_writer_t *self)
+{
+ jsonw_printf(self, "null");
+}
+
+void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num)
+{
+ jsonw_printf(self, fmt, num);
+}
+
+void jsonw_float(json_writer_t *self, double num)
+{
+ jsonw_printf(self, "%g", num);
+}
+
+void jsonw_hu(json_writer_t *self, unsigned short num)
+{
+ jsonw_printf(self, "%hu", num);
+}
+
+void jsonw_uint(json_writer_t *self, uint64_t num)
+{
+ jsonw_printf(self, "%"PRIu64, num);
+}
+
+void jsonw_lluint(json_writer_t *self, unsigned long long int num)
+{
+ jsonw_printf(self, "%llu", num);
+}
+
+void jsonw_int(json_writer_t *self, int64_t num)
+{
+ jsonw_printf(self, "%"PRId64, num);
+}
+
+void jsonw_string_field(json_writer_t *self, const char *prop, const char *val)
+{
+ jsonw_name(self, prop);
+ jsonw_string(self, val);
+}
+
+void jsonw_bool_field(json_writer_t *self, const char *prop, bool val)
+{
+ jsonw_name(self, prop);
+ jsonw_bool(self, val);
+}
+
+void jsonw_float_field(json_writer_t *self, const char *prop, double val)
+{
+ jsonw_name(self, prop);
+ jsonw_float(self, val);
+}
+
+void jsonw_float_field_fmt(json_writer_t *self,
+ const char *prop,
+ const char *fmt,
+ double val)
+{
+ jsonw_name(self, prop);
+ jsonw_float_fmt(self, fmt, val);
+}
+
+void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num)
+{
+ jsonw_name(self, prop);
+ jsonw_uint(self, num);
+}
+
+void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num)
+{
+ jsonw_name(self, prop);
+ jsonw_hu(self, num);
+}
+
+void jsonw_lluint_field(json_writer_t *self,
+ const char *prop,
+ unsigned long long int num)
+{
+ jsonw_name(self, prop);
+ jsonw_lluint(self, num);
+}
+
+void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num)
+{
+ jsonw_name(self, prop);
+ jsonw_int(self, num);
+}
+
+void jsonw_null_field(json_writer_t *self, const char *prop)
+{
+ jsonw_name(self, prop);
+ jsonw_null(self);
+}
diff --git a/tools/net/ynl/ynltool/json_writer.h b/tools/net/ynl/ynltool/json_writer.h
new file mode 100644
index 000000000000..0f1e63c88f6a
--- /dev/null
+++ b/tools/net/ynl/ynltool/json_writer.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Simple streaming JSON writer
+ *
+ * This takes care of the annoying bits of JSON syntax like the commas
+ * after elements
+ *
+ * Authors: Stephen Hemminger <stephen@networkplumber.org>
+ */
+
+#ifndef _JSON_WRITER_H_
+#define _JSON_WRITER_H_
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <stdio.h>
+
+/* Opaque class structure */
+typedef struct json_writer json_writer_t;
+
+/* Create a new JSON stream */
+json_writer_t *jsonw_new(FILE *f);
+/* End output to JSON stream */
+void jsonw_destroy(json_writer_t **self_p);
+
+/* Cause output to have pretty whitespace */
+void jsonw_pretty(json_writer_t *self, bool on);
+
+/* Reset separator to create new JSON */
+void jsonw_reset(json_writer_t *self);
+
+/* Add property name */
+void jsonw_name(json_writer_t *self, const char *name);
+
+/* Add value */
+void __attribute__((format(printf, 2, 0))) jsonw_vprintf_enquote(json_writer_t *self,
+ const char *fmt,
+ va_list ap);
+void __attribute__((format(printf, 2, 3))) jsonw_printf(json_writer_t *self,
+ const char *fmt, ...);
+void jsonw_string(json_writer_t *self, const char *value);
+void jsonw_bool(json_writer_t *self, bool value);
+void jsonw_float(json_writer_t *self, double number);
+void jsonw_float_fmt(json_writer_t *self, const char *fmt, double num);
+void jsonw_uint(json_writer_t *self, uint64_t number);
+void jsonw_hu(json_writer_t *self, unsigned short number);
+void jsonw_int(json_writer_t *self, int64_t number);
+void jsonw_null(json_writer_t *self);
+void jsonw_lluint(json_writer_t *self, unsigned long long int num);
+
+/* Useful Combinations of name and value */
+void jsonw_string_field(json_writer_t *self, const char *prop, const char *val);
+void jsonw_bool_field(json_writer_t *self, const char *prop, bool value);
+void jsonw_float_field(json_writer_t *self, const char *prop, double num);
+void jsonw_uint_field(json_writer_t *self, const char *prop, uint64_t num);
+void jsonw_hu_field(json_writer_t *self, const char *prop, unsigned short num);
+void jsonw_int_field(json_writer_t *self, const char *prop, int64_t num);
+void jsonw_null_field(json_writer_t *self, const char *prop);
+void jsonw_lluint_field(json_writer_t *self, const char *prop,
+ unsigned long long int num);
+void jsonw_float_field_fmt(json_writer_t *self, const char *prop,
+ const char *fmt, double val);
+
+/* Collections */
+void jsonw_start_object(json_writer_t *self);
+void jsonw_end_object(json_writer_t *self);
+
+void jsonw_start_array(json_writer_t *self);
+void jsonw_end_array(json_writer_t *self);
+
+/* Override default exception handling */
+typedef void (jsonw_err_handler_fn)(const char *);
+
+#endif /* _JSON_WRITER_H_ */
diff --git a/tools/net/ynl/ynltool/main.c b/tools/net/ynl/ynltool/main.c
new file mode 100644
index 000000000000..5d0f428eed0a
--- /dev/null
+++ b/tools/net/ynl/ynltool/main.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
+/* Copyright Meta Platforms, Inc. and affiliates */
+
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include "main.h"
+
+const char *bin_name;
+static int last_argc;
+static char **last_argv;
+static int (*last_do_help)(int argc, char **argv);
+json_writer_t *json_wtr;
+bool pretty_output;
+bool json_output;
+
+static void __attribute__((noreturn)) clean_and_exit(int i)
+{
+ if (json_output)
+ jsonw_destroy(&json_wtr);
+
+ exit(i);
+}
+
+void usage(void)
+{
+ last_do_help(last_argc - 1, last_argv + 1);
+
+ clean_and_exit(-1);
+}
+
+static int do_help(int argc __attribute__((unused)),
+ char **argv __attribute__((unused)))
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s [OPTIONS] OBJECT { COMMAND | help }\n"
+ " %s version\n"
+ "\n"
+ " OBJECT := { page-pool | qstats }\n"
+ " " HELP_SPEC_OPTIONS "\n"
+ "",
+ bin_name, bin_name);
+
+ return 0;
+}
+
+static int do_version(int argc __attribute__((unused)),
+ char **argv __attribute__((unused)))
+{
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "version");
+ jsonw_printf(json_wtr, SRC_VERSION);
+ jsonw_end_object(json_wtr);
+ } else {
+ printf("%s " SRC_VERSION "\n", bin_name);
+ }
+ return 0;
+}
+
+static const struct cmd commands[] = {
+ { "help", do_help },
+ { "page-pool", do_page_pool },
+ { "qstats", do_qstats },
+ { "version", do_version },
+ { 0 }
+};
+
+int cmd_select(const struct cmd *cmds, int argc, char **argv,
+ int (*help)(int argc, char **argv))
+{
+ unsigned int i;
+
+ last_argc = argc;
+ last_argv = argv;
+ last_do_help = help;
+
+ if (argc < 1 && cmds[0].func)
+ return cmds[0].func(argc, argv);
+
+ for (i = 0; cmds[i].cmd; i++) {
+ if (is_prefix(*argv, cmds[i].cmd)) {
+ if (!cmds[i].func) {
+ p_err("command '%s' is not available", cmds[i].cmd);
+ return -1;
+ }
+ return cmds[i].func(argc - 1, argv + 1);
+ }
+ }
+
+ help(argc - 1, argv + 1);
+
+ return -1;
+}
+
+bool is_prefix(const char *pfx, const char *str)
+{
+ if (!pfx)
+ return false;
+ if (strlen(str) < strlen(pfx))
+ return false;
+
+ return !memcmp(str, pfx, strlen(pfx));
+}
+
+/* Last argument MUST be NULL pointer */
+int detect_common_prefix(const char *arg, ...)
+{
+ unsigned int count = 0;
+ const char *ref;
+ char msg[256];
+ va_list ap;
+
+ snprintf(msg, sizeof(msg), "ambiguous prefix: '%s' could be '", arg);
+ va_start(ap, arg);
+ while ((ref = va_arg(ap, const char *))) {
+ if (!is_prefix(arg, ref))
+ continue;
+ count++;
+ if (count > 1)
+ strncat(msg, "' or '", sizeof(msg) - strlen(msg) - 1);
+ strncat(msg, ref, sizeof(msg) - strlen(msg) - 1);
+ }
+ va_end(ap);
+ strncat(msg, "'", sizeof(msg) - strlen(msg) - 1);
+
+ if (count >= 2) {
+ p_err("%s", msg);
+ return -1;
+ }
+
+ return 0;
+}
+
+void p_err(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ jsonw_name(json_wtr, "error");
+ jsonw_vprintf_enquote(json_wtr, fmt, ap);
+ jsonw_end_object(json_wtr);
+ } else {
+ fprintf(stderr, "Error: ");
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ }
+ va_end(ap);
+}
+
+void p_info(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (json_output)
+ return;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
+}
+
+int main(int argc, char **argv)
+{
+ static const struct option options[] = {
+ { "json", no_argument, NULL, 'j' },
+ { "help", no_argument, NULL, 'h' },
+ { "pretty", no_argument, NULL, 'p' },
+ { "version", no_argument, NULL, 'V' },
+ { 0 }
+ };
+ bool version_requested = false;
+ int opt, ret;
+
+ setlinebuf(stdout);
+
+ last_do_help = do_help;
+ pretty_output = false;
+ json_output = false;
+ bin_name = "ynltool";
+
+ opterr = 0;
+ while ((opt = getopt_long(argc, argv, "Vhjp",
+ options, NULL)) >= 0) {
+ switch (opt) {
+ case 'V':
+ version_requested = true;
+ break;
+ case 'h':
+ return do_help(argc, argv);
+ case 'p':
+ pretty_output = true;
+ /* fall through */
+ case 'j':
+ if (!json_output) {
+ json_wtr = jsonw_new(stdout);
+ if (!json_wtr) {
+ p_err("failed to create JSON writer");
+ return -1;
+ }
+ json_output = true;
+ }
+ jsonw_pretty(json_wtr, pretty_output);
+ break;
+ default:
+ p_err("unrecognized option '%s'", argv[optind - 1]);
+ if (json_output)
+ clean_and_exit(-1);
+ else
+ usage();
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+ if (argc < 0)
+ usage();
+
+ if (version_requested)
+ ret = do_version(argc, argv);
+ else
+ ret = cmd_select(commands, argc, argv, do_help);
+
+ if (json_output)
+ jsonw_destroy(&json_wtr);
+
+ return ret;
+}
diff --git a/tools/net/ynl/ynltool/main.h b/tools/net/ynl/ynltool/main.h
new file mode 100644
index 000000000000..c7039f9ac55a
--- /dev/null
+++ b/tools/net/ynl/ynltool/main.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (C) 2017-2018 Netronome Systems, Inc. */
+/* Copyright Meta Platforms, Inc. and affiliates */
+
+#ifndef __YNLTOOL_H
+#define __YNLTOOL_H
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "json_writer.h"
+
+#define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); })
+#define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
+#define BAD_ARG() ({ p_err("what is '%s'?", *argv); -1; })
+#define GET_ARG() ({ argc--; *argv++; })
+#define REQ_ARGS(cnt) \
+ ({ \
+ int _cnt = (cnt); \
+ bool _res; \
+ \
+ if (argc < _cnt) { \
+ p_err("'%s' needs at least %d arguments, %d found", \
+ argv[-1], _cnt, argc); \
+ _res = false; \
+ } else { \
+ _res = true; \
+ } \
+ _res; \
+ })
+
+#define HELP_SPEC_OPTIONS \
+ "OPTIONS := { {-j|--json} [{-p|--pretty}] }"
+
+extern const char *bin_name;
+
+extern json_writer_t *json_wtr;
+extern bool json_output;
+extern bool pretty_output;
+
+void __attribute__((format(printf, 1, 2))) p_err(const char *fmt, ...);
+void __attribute__((format(printf, 1, 2))) p_info(const char *fmt, ...);
+
+bool is_prefix(const char *pfx, const char *str);
+int detect_common_prefix(const char *arg, ...);
+void usage(void) __attribute__((noreturn));
+
+struct cmd {
+ const char *cmd;
+ int (*func)(int argc, char **argv);
+};
+
+int cmd_select(const struct cmd *cmds, int argc, char **argv,
+ int (*help)(int argc, char **argv));
+
+/* subcommands */
+int do_page_pool(int argc, char **argv);
+int do_qstats(int argc, char **argv);
+
+#endif /* __YNLTOOL_H */
diff --git a/tools/net/ynl/ynltool/page-pool.c b/tools/net/ynl/ynltool/page-pool.c
new file mode 100644
index 000000000000..4b24492abab7
--- /dev/null
+++ b/tools/net/ynl/ynltool/page-pool.c
@@ -0,0 +1,461 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <net/if.h>
+
+#include <ynl.h>
+#include "netdev-user.h"
+
+#include "main.h"
+
+struct pp_stat {
+ unsigned int ifc;
+
+ struct {
+ unsigned int cnt;
+ size_t refs, bytes;
+ } live[2];
+
+ size_t alloc_slow, alloc_fast, recycle_ring, recycle_cache;
+};
+
+struct pp_stats_array {
+ unsigned int i, max;
+ struct pp_stat *s;
+};
+
+static struct pp_stat *find_ifc(struct pp_stats_array *a, unsigned int ifindex)
+{
+ unsigned int i;
+
+ for (i = 0; i < a->i; i++) {
+ if (a->s[i].ifc == ifindex)
+ return &a->s[i];
+ }
+
+ a->i++;
+ if (a->i == a->max) {
+ a->max *= 2;
+ a->s = reallocarray(a->s, a->max, sizeof(*a->s));
+ }
+ a->s[i].ifc = ifindex;
+ return &a->s[i];
+}
+
+static void count_pool(struct pp_stat *s, unsigned int l,
+ struct netdev_page_pool_get_rsp *pp)
+{
+ s->live[l].cnt++;
+ if (pp->_present.inflight)
+ s->live[l].refs += pp->inflight;
+ if (pp->_present.inflight_mem)
+ s->live[l].bytes += pp->inflight_mem;
+}
+
+/* We don't know how many pages are sitting in cache and ring
+ * so we will under-count the recycling rate a bit.
+ */
+static void print_json_recycling_stats(struct pp_stat *s)
+{
+ double recycle;
+
+ if (s->alloc_fast + s->alloc_slow) {
+ recycle = (double)(s->recycle_ring + s->recycle_cache) /
+ (s->alloc_fast + s->alloc_slow) * 100;
+ jsonw_float_field(json_wtr, "recycling_pct", recycle);
+ }
+
+ jsonw_name(json_wtr, "alloc");
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "slow", s->alloc_slow);
+ jsonw_uint_field(json_wtr, "fast", s->alloc_fast);
+ jsonw_end_object(json_wtr);
+
+ jsonw_name(json_wtr, "recycle");
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "ring", s->recycle_ring);
+ jsonw_uint_field(json_wtr, "cache", s->recycle_cache);
+ jsonw_end_object(json_wtr);
+}
+
+static void print_plain_recycling_stats(struct pp_stat *s)
+{
+ double recycle;
+
+ if (s->alloc_fast + s->alloc_slow) {
+ recycle = (double)(s->recycle_ring + s->recycle_cache) /
+ (s->alloc_fast + s->alloc_slow) * 100;
+ printf("recycling: %.1lf%% (alloc: %zu:%zu recycle: %zu:%zu)",
+ recycle, s->alloc_slow, s->alloc_fast,
+ s->recycle_ring, s->recycle_cache);
+ }
+}
+
+static void print_json_stats(struct pp_stats_array *a)
+{
+ jsonw_start_array(json_wtr);
+
+ for (unsigned int i = 0; i < a->i; i++) {
+ char ifname[IF_NAMESIZE];
+ struct pp_stat *s = &a->s[i];
+ const char *name;
+
+ jsonw_start_object(json_wtr);
+
+ if (!s->ifc) {
+ jsonw_string_field(json_wtr, "ifname", "<orphan>");
+ jsonw_uint_field(json_wtr, "ifindex", 0);
+ } else {
+ name = if_indextoname(s->ifc, ifname);
+ if (name)
+ jsonw_string_field(json_wtr, "ifname", name);
+ jsonw_uint_field(json_wtr, "ifindex", s->ifc);
+ }
+
+ jsonw_uint_field(json_wtr, "page_pools", s->live[1].cnt);
+ jsonw_uint_field(json_wtr, "zombies", s->live[0].cnt);
+
+ jsonw_name(json_wtr, "live");
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "refs", s->live[1].refs);
+ jsonw_uint_field(json_wtr, "bytes", s->live[1].bytes);
+ jsonw_end_object(json_wtr);
+
+ jsonw_name(json_wtr, "zombie");
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "refs", s->live[0].refs);
+ jsonw_uint_field(json_wtr, "bytes", s->live[0].bytes);
+ jsonw_end_object(json_wtr);
+
+ if (s->alloc_fast || s->alloc_slow)
+ print_json_recycling_stats(s);
+
+ jsonw_end_object(json_wtr);
+ }
+
+ jsonw_end_array(json_wtr);
+}
+
+static void print_plain_stats(struct pp_stats_array *a)
+{
+ for (unsigned int i = 0; i < a->i; i++) {
+ char ifname[IF_NAMESIZE];
+ struct pp_stat *s = &a->s[i];
+ const char *name;
+
+ if (!s->ifc) {
+ printf("<orphan>\t");
+ } else {
+ name = if_indextoname(s->ifc, ifname);
+ if (name)
+ printf("%8s", name);
+ printf("[%u]\t", s->ifc);
+ }
+
+ printf("page pools: %u (zombies: %u)\n",
+ s->live[1].cnt, s->live[0].cnt);
+ printf("\t\trefs: %zu bytes: %zu (refs: %zu bytes: %zu)\n",
+ s->live[1].refs, s->live[1].bytes,
+ s->live[0].refs, s->live[0].bytes);
+
+ if (s->alloc_fast || s->alloc_slow) {
+ printf("\t\t");
+ print_plain_recycling_stats(s);
+ printf("\n");
+ }
+ }
+}
+
+static bool
+find_pool_stat_in_list(struct netdev_page_pool_stats_get_list *pp_stats,
+ __u64 pool_id, struct pp_stat *pstat)
+{
+ ynl_dump_foreach(pp_stats, pp) {
+ if (!pp->_present.info || !pp->info._present.id)
+ continue;
+ if (pp->info.id != pool_id)
+ continue;
+
+ memset(pstat, 0, sizeof(*pstat));
+ if (pp->_present.alloc_fast)
+ pstat->alloc_fast = pp->alloc_fast;
+ if (pp->_present.alloc_refill)
+ pstat->alloc_fast += pp->alloc_refill;
+ if (pp->_present.alloc_slow)
+ pstat->alloc_slow = pp->alloc_slow;
+ if (pp->_present.recycle_ring)
+ pstat->recycle_ring = pp->recycle_ring;
+ if (pp->_present.recycle_cached)
+ pstat->recycle_cache = pp->recycle_cached;
+ return true;
+ }
+ return false;
+}
+
+static void
+print_json_pool_list(struct netdev_page_pool_get_list *pools,
+ struct netdev_page_pool_stats_get_list *pp_stats,
+ bool zombies_only)
+{
+ jsonw_start_array(json_wtr);
+
+ ynl_dump_foreach(pools, pp) {
+ char ifname[IF_NAMESIZE];
+ struct pp_stat pstat;
+ const char *name;
+
+ if (zombies_only && !pp->_present.detach_time)
+ continue;
+
+ jsonw_start_object(json_wtr);
+
+ jsonw_uint_field(json_wtr, "id", pp->id);
+
+ if (pp->_present.ifindex) {
+ name = if_indextoname(pp->ifindex, ifname);
+ if (name)
+ jsonw_string_field(json_wtr, "ifname", name);
+ jsonw_uint_field(json_wtr, "ifindex", pp->ifindex);
+ }
+
+ if (pp->_present.napi_id)
+ jsonw_uint_field(json_wtr, "napi_id", pp->napi_id);
+
+ if (pp->_present.inflight)
+ jsonw_uint_field(json_wtr, "refs", pp->inflight);
+
+ if (pp->_present.inflight_mem)
+ jsonw_uint_field(json_wtr, "bytes", pp->inflight_mem);
+
+ if (pp->_present.detach_time)
+ jsonw_uint_field(json_wtr, "detach_time", pp->detach_time);
+
+ if (pp->_present.dmabuf)
+ jsonw_uint_field(json_wtr, "dmabuf", pp->dmabuf);
+
+ if (find_pool_stat_in_list(pp_stats, pp->id, &pstat) &&
+ (pstat.alloc_fast || pstat.alloc_slow))
+ print_json_recycling_stats(&pstat);
+
+ jsonw_end_object(json_wtr);
+ }
+
+ jsonw_end_array(json_wtr);
+}
+
+static void
+print_plain_pool_list(struct netdev_page_pool_get_list *pools,
+ struct netdev_page_pool_stats_get_list *pp_stats,
+ bool zombies_only)
+{
+ ynl_dump_foreach(pools, pp) {
+ char ifname[IF_NAMESIZE];
+ struct pp_stat pstat;
+ const char *name;
+
+ if (zombies_only && !pp->_present.detach_time)
+ continue;
+
+ printf("pool id: %llu", pp->id);
+
+ if (pp->_present.ifindex) {
+ name = if_indextoname(pp->ifindex, ifname);
+ if (name)
+ printf(" dev: %s", name);
+ printf("[%u]", pp->ifindex);
+ }
+
+ if (pp->_present.napi_id)
+ printf(" napi: %llu", pp->napi_id);
+
+ printf("\n");
+
+ if (pp->_present.inflight || pp->_present.inflight_mem) {
+ printf(" inflight:");
+ if (pp->_present.inflight)
+ printf(" %llu pages", pp->inflight);
+ if (pp->_present.inflight_mem)
+ printf(" %llu bytes", pp->inflight_mem);
+ printf("\n");
+ }
+
+ if (pp->_present.detach_time)
+ printf(" detached: %llu\n", pp->detach_time);
+
+ if (pp->_present.dmabuf)
+ printf(" dmabuf: %u\n", pp->dmabuf);
+
+ if (find_pool_stat_in_list(pp_stats, pp->id, &pstat) &&
+ (pstat.alloc_fast || pstat.alloc_slow)) {
+ printf(" ");
+ print_plain_recycling_stats(&pstat);
+ printf("\n");
+ }
+ }
+}
+
+static void aggregate_device_stats(struct pp_stats_array *a,
+ struct netdev_page_pool_get_list *pools,
+ struct netdev_page_pool_stats_get_list *pp_stats)
+{
+ ynl_dump_foreach(pools, pp) {
+ struct pp_stat *s = find_ifc(a, pp->ifindex);
+
+ count_pool(s, 1, pp);
+ if (pp->_present.detach_time)
+ count_pool(s, 0, pp);
+ }
+
+ ynl_dump_foreach(pp_stats, pp) {
+ struct pp_stat *s = find_ifc(a, pp->info.ifindex);
+
+ if (pp->_present.alloc_fast)
+ s->alloc_fast += pp->alloc_fast;
+ if (pp->_present.alloc_refill)
+ s->alloc_fast += pp->alloc_refill;
+ if (pp->_present.alloc_slow)
+ s->alloc_slow += pp->alloc_slow;
+ if (pp->_present.recycle_ring)
+ s->recycle_ring += pp->recycle_ring;
+ if (pp->_present.recycle_cached)
+ s->recycle_cache += pp->recycle_cached;
+ }
+}
+
+static int do_stats(int argc, char **argv)
+{
+ struct netdev_page_pool_stats_get_list *pp_stats;
+ struct netdev_page_pool_get_list *pools;
+ enum {
+ GROUP_BY_DEVICE,
+ GROUP_BY_POOL,
+ } group_by = GROUP_BY_DEVICE;
+ bool zombies_only = false;
+ struct pp_stats_array a = {};
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret = 0;
+
+ /* Parse options */
+ while (argc > 0) {
+ if (is_prefix(*argv, "group-by")) {
+ NEXT_ARG();
+
+ if (!REQ_ARGS(1))
+ return -1;
+
+ if (is_prefix(*argv, "device")) {
+ group_by = GROUP_BY_DEVICE;
+ } else if (is_prefix(*argv, "pp") ||
+ is_prefix(*argv, "page-pool") ||
+ is_prefix(*argv, "none")) {
+ group_by = GROUP_BY_POOL;
+ } else {
+ p_err("invalid group-by value '%s'", *argv);
+ return -1;
+ }
+ NEXT_ARG();
+ } else if (is_prefix(*argv, "zombies")) {
+ zombies_only = true;
+ group_by = GROUP_BY_POOL;
+ NEXT_ARG();
+ } else {
+ p_err("unknown option '%s'", *argv);
+ return -1;
+ }
+ }
+
+ ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!ys) {
+ p_err("YNL: %s", yerr.msg);
+ return -1;
+ }
+
+ pools = netdev_page_pool_get_dump(ys);
+ if (!pools) {
+ p_err("failed to get page pools: %s", ys->err.msg);
+ ret = -1;
+ goto exit_close;
+ }
+
+ pp_stats = netdev_page_pool_stats_get_dump(ys);
+ if (!pp_stats) {
+ p_err("failed to get page pool stats: %s", ys->err.msg);
+ ret = -1;
+ goto exit_free_pp_list;
+ }
+
+ /* If grouping by pool, print individual pools */
+ if (group_by == GROUP_BY_POOL) {
+ if (json_output)
+ print_json_pool_list(pools, pp_stats, zombies_only);
+ else
+ print_plain_pool_list(pools, pp_stats, zombies_only);
+ } else {
+ /* Aggregated stats mode (group-by device) */
+ a.max = 64;
+ a.s = calloc(a.max, sizeof(*a.s));
+ if (!a.s) {
+ p_err("failed to allocate stats array");
+ ret = -1;
+ goto exit_free_stats_list;
+ }
+
+ aggregate_device_stats(&a, pools, pp_stats);
+
+ if (json_output)
+ print_json_stats(&a);
+ else
+ print_plain_stats(&a);
+
+ free(a.s);
+ }
+
+exit_free_stats_list:
+ netdev_page_pool_stats_get_list_free(pp_stats);
+exit_free_pp_list:
+ netdev_page_pool_get_list_free(pools);
+exit_close:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static int do_help(int argc __attribute__((unused)),
+ char **argv __attribute__((unused)))
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s page-pool { COMMAND | help }\n"
+ " %s page-pool stats [ OPTIONS ]\n"
+ "\n"
+ " OPTIONS := { group-by { device | page-pool | none } | zombies }\n"
+ "\n"
+ " stats - Display page pool statistics\n"
+ " stats group-by device - Group statistics by network device (default)\n"
+ " stats group-by page-pool | pp | none\n"
+ " - Show individual page pool details (no grouping)\n"
+ " stats zombies - Show only zombie page pools (detached but with\n"
+ " pages in flight). Implies group-by page-pool.\n"
+ "",
+ bin_name, bin_name);
+
+ return 0;
+}
+
+static const struct cmd page_pool_cmds[] = {
+ { "help", do_help },
+ { "stats", do_stats },
+ { 0 }
+};
+
+int do_page_pool(int argc, char **argv)
+{
+ return cmd_select(page_pool_cmds, argc, argv, do_help);
+}
diff --git a/tools/net/ynl/ynltool/qstats.c b/tools/net/ynl/ynltool/qstats.c
new file mode 100644
index 000000000000..31fb45709ffa
--- /dev/null
+++ b/tools/net/ynl/ynltool/qstats.c
@@ -0,0 +1,621 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <net/if.h>
+#include <math.h>
+
+#include <ynl.h>
+#include "netdev-user.h"
+
+#include "main.h"
+
+static enum netdev_qstats_scope scope; /* default - device */
+
+struct queue_balance {
+ unsigned int ifindex;
+ enum netdev_queue_type type;
+ unsigned int queue_count;
+ __u64 *rx_packets;
+ __u64 *rx_bytes;
+ __u64 *tx_packets;
+ __u64 *tx_bytes;
+};
+
+static void print_json_qstats(struct netdev_qstats_get_list *qstats)
+{
+ jsonw_start_array(json_wtr);
+
+ ynl_dump_foreach(qstats, qs) {
+ char ifname[IF_NAMESIZE];
+ const char *name;
+
+ jsonw_start_object(json_wtr);
+
+ name = if_indextoname(qs->ifindex, ifname);
+ if (name)
+ jsonw_string_field(json_wtr, "ifname", name);
+ jsonw_uint_field(json_wtr, "ifindex", qs->ifindex);
+
+ if (qs->_present.queue_type)
+ jsonw_string_field(json_wtr, "queue-type",
+ netdev_queue_type_str(qs->queue_type));
+ if (qs->_present.queue_id)
+ jsonw_uint_field(json_wtr, "queue-id", qs->queue_id);
+
+ if (qs->_present.rx_packets || qs->_present.rx_bytes ||
+ qs->_present.rx_alloc_fail || qs->_present.rx_hw_drops ||
+ qs->_present.rx_csum_complete || qs->_present.rx_hw_gro_packets) {
+ jsonw_name(json_wtr, "rx");
+ jsonw_start_object(json_wtr);
+ if (qs->_present.rx_packets)
+ jsonw_uint_field(json_wtr, "packets", qs->rx_packets);
+ if (qs->_present.rx_bytes)
+ jsonw_uint_field(json_wtr, "bytes", qs->rx_bytes);
+ if (qs->_present.rx_alloc_fail)
+ jsonw_uint_field(json_wtr, "alloc-fail", qs->rx_alloc_fail);
+ if (qs->_present.rx_hw_drops)
+ jsonw_uint_field(json_wtr, "hw-drops", qs->rx_hw_drops);
+ if (qs->_present.rx_hw_drop_overruns)
+ jsonw_uint_field(json_wtr, "hw-drop-overruns", qs->rx_hw_drop_overruns);
+ if (qs->_present.rx_hw_drop_ratelimits)
+ jsonw_uint_field(json_wtr, "hw-drop-ratelimits", qs->rx_hw_drop_ratelimits);
+ if (qs->_present.rx_csum_complete)
+ jsonw_uint_field(json_wtr, "csum-complete", qs->rx_csum_complete);
+ if (qs->_present.rx_csum_unnecessary)
+ jsonw_uint_field(json_wtr, "csum-unnecessary", qs->rx_csum_unnecessary);
+ if (qs->_present.rx_csum_none)
+ jsonw_uint_field(json_wtr, "csum-none", qs->rx_csum_none);
+ if (qs->_present.rx_csum_bad)
+ jsonw_uint_field(json_wtr, "csum-bad", qs->rx_csum_bad);
+ if (qs->_present.rx_hw_gro_packets)
+ jsonw_uint_field(json_wtr, "hw-gro-packets", qs->rx_hw_gro_packets);
+ if (qs->_present.rx_hw_gro_bytes)
+ jsonw_uint_field(json_wtr, "hw-gro-bytes", qs->rx_hw_gro_bytes);
+ if (qs->_present.rx_hw_gro_wire_packets)
+ jsonw_uint_field(json_wtr, "hw-gro-wire-packets", qs->rx_hw_gro_wire_packets);
+ if (qs->_present.rx_hw_gro_wire_bytes)
+ jsonw_uint_field(json_wtr, "hw-gro-wire-bytes", qs->rx_hw_gro_wire_bytes);
+ jsonw_end_object(json_wtr);
+ }
+
+ if (qs->_present.tx_packets || qs->_present.tx_bytes ||
+ qs->_present.tx_hw_drops || qs->_present.tx_csum_none ||
+ qs->_present.tx_hw_gso_packets) {
+ jsonw_name(json_wtr, "tx");
+ jsonw_start_object(json_wtr);
+ if (qs->_present.tx_packets)
+ jsonw_uint_field(json_wtr, "packets", qs->tx_packets);
+ if (qs->_present.tx_bytes)
+ jsonw_uint_field(json_wtr, "bytes", qs->tx_bytes);
+ if (qs->_present.tx_hw_drops)
+ jsonw_uint_field(json_wtr, "hw-drops", qs->tx_hw_drops);
+ if (qs->_present.tx_hw_drop_errors)
+ jsonw_uint_field(json_wtr, "hw-drop-errors", qs->tx_hw_drop_errors);
+ if (qs->_present.tx_hw_drop_ratelimits)
+ jsonw_uint_field(json_wtr, "hw-drop-ratelimits", qs->tx_hw_drop_ratelimits);
+ if (qs->_present.tx_csum_none)
+ jsonw_uint_field(json_wtr, "csum-none", qs->tx_csum_none);
+ if (qs->_present.tx_needs_csum)
+ jsonw_uint_field(json_wtr, "needs-csum", qs->tx_needs_csum);
+ if (qs->_present.tx_hw_gso_packets)
+ jsonw_uint_field(json_wtr, "hw-gso-packets", qs->tx_hw_gso_packets);
+ if (qs->_present.tx_hw_gso_bytes)
+ jsonw_uint_field(json_wtr, "hw-gso-bytes", qs->tx_hw_gso_bytes);
+ if (qs->_present.tx_hw_gso_wire_packets)
+ jsonw_uint_field(json_wtr, "hw-gso-wire-packets", qs->tx_hw_gso_wire_packets);
+ if (qs->_present.tx_hw_gso_wire_bytes)
+ jsonw_uint_field(json_wtr, "hw-gso-wire-bytes", qs->tx_hw_gso_wire_bytes);
+ if (qs->_present.tx_stop)
+ jsonw_uint_field(json_wtr, "stop", qs->tx_stop);
+ if (qs->_present.tx_wake)
+ jsonw_uint_field(json_wtr, "wake", qs->tx_wake);
+ jsonw_end_object(json_wtr);
+ }
+
+ jsonw_end_object(json_wtr);
+ }
+
+ jsonw_end_array(json_wtr);
+}
+
+static void print_one(bool present, const char *name, unsigned long long val,
+ int *line)
+{
+ if (!present)
+ return;
+
+ if (!*line) {
+ printf(" ");
+ ++(*line);
+ }
+
+ /* Don't waste space on tx- and rx- prefix, its implied by queue type */
+ if (scope == NETDEV_QSTATS_SCOPE_QUEUE &&
+ (name[0] == 'r' || name[0] == 't') &&
+ name[1] == 'x' && name[2] == '-')
+ name += 3;
+
+ printf(" %15s: %15llu", name, val);
+
+ if (++(*line) == 3) {
+ printf("\n");
+ *line = 0;
+ }
+}
+
+static void print_plain_qstats(struct netdev_qstats_get_list *qstats)
+{
+ ynl_dump_foreach(qstats, qs) {
+ char ifname[IF_NAMESIZE];
+ const char *name;
+ int n;
+
+ name = if_indextoname(qs->ifindex, ifname);
+ if (name)
+ printf("%s", name);
+ else
+ printf("ifindex:%u", qs->ifindex);
+
+ if (qs->_present.queue_type && qs->_present.queue_id)
+ printf("\t%s-%-3u",
+ netdev_queue_type_str(qs->queue_type),
+ qs->queue_id);
+ else
+ printf("\t ");
+
+ n = 1;
+
+ /* Basic counters */
+ print_one(qs->_present.rx_packets, "rx-packets", qs->rx_packets, &n);
+ print_one(qs->_present.rx_bytes, "rx-bytes", qs->rx_bytes, &n);
+ print_one(qs->_present.tx_packets, "tx-packets", qs->tx_packets, &n);
+ print_one(qs->_present.tx_bytes, "tx-bytes", qs->tx_bytes, &n);
+
+ /* RX error/drop counters */
+ print_one(qs->_present.rx_alloc_fail, "rx-alloc-fail",
+ qs->rx_alloc_fail, &n);
+ print_one(qs->_present.rx_hw_drops, "rx-hw-drops",
+ qs->rx_hw_drops, &n);
+ print_one(qs->_present.rx_hw_drop_overruns, "rx-hw-drop-overruns",
+ qs->rx_hw_drop_overruns, &n);
+ print_one(qs->_present.rx_hw_drop_ratelimits, "rx-hw-drop-ratelimits",
+ qs->rx_hw_drop_ratelimits, &n);
+
+ /* RX checksum counters */
+ print_one(qs->_present.rx_csum_complete, "rx-csum-complete",
+ qs->rx_csum_complete, &n);
+ print_one(qs->_present.rx_csum_unnecessary, "rx-csum-unnecessary",
+ qs->rx_csum_unnecessary, &n);
+ print_one(qs->_present.rx_csum_none, "rx-csum-none",
+ qs->rx_csum_none, &n);
+ print_one(qs->_present.rx_csum_bad, "rx-csum-bad",
+ qs->rx_csum_bad, &n);
+
+ /* RX GRO counters */
+ print_one(qs->_present.rx_hw_gro_packets, "rx-hw-gro-packets",
+ qs->rx_hw_gro_packets, &n);
+ print_one(qs->_present.rx_hw_gro_bytes, "rx-hw-gro-bytes",
+ qs->rx_hw_gro_bytes, &n);
+ print_one(qs->_present.rx_hw_gro_wire_packets, "rx-hw-gro-wire-packets",
+ qs->rx_hw_gro_wire_packets, &n);
+ print_one(qs->_present.rx_hw_gro_wire_bytes, "rx-hw-gro-wire-bytes",
+ qs->rx_hw_gro_wire_bytes, &n);
+
+ /* TX error/drop counters */
+ print_one(qs->_present.tx_hw_drops, "tx-hw-drops",
+ qs->tx_hw_drops, &n);
+ print_one(qs->_present.tx_hw_drop_errors, "tx-hw-drop-errors",
+ qs->tx_hw_drop_errors, &n);
+ print_one(qs->_present.tx_hw_drop_ratelimits, "tx-hw-drop-ratelimits",
+ qs->tx_hw_drop_ratelimits, &n);
+
+ /* TX checksum counters */
+ print_one(qs->_present.tx_csum_none, "tx-csum-none",
+ qs->tx_csum_none, &n);
+ print_one(qs->_present.tx_needs_csum, "tx-needs-csum",
+ qs->tx_needs_csum, &n);
+
+ /* TX GSO counters */
+ print_one(qs->_present.tx_hw_gso_packets, "tx-hw-gso-packets",
+ qs->tx_hw_gso_packets, &n);
+ print_one(qs->_present.tx_hw_gso_bytes, "tx-hw-gso-bytes",
+ qs->tx_hw_gso_bytes, &n);
+ print_one(qs->_present.tx_hw_gso_wire_packets, "tx-hw-gso-wire-packets",
+ qs->tx_hw_gso_wire_packets, &n);
+ print_one(qs->_present.tx_hw_gso_wire_bytes, "tx-hw-gso-wire-bytes",
+ qs->tx_hw_gso_wire_bytes, &n);
+
+ /* TX queue control */
+ print_one(qs->_present.tx_stop, "tx-stop", qs->tx_stop, &n);
+ print_one(qs->_present.tx_wake, "tx-wake", qs->tx_wake, &n);
+
+ if (n)
+ printf("\n");
+ }
+}
+
+static int do_show(int argc, char **argv)
+{
+ struct netdev_qstats_get_list *qstats;
+ struct netdev_qstats_get_req *req;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ int ret = 0;
+
+ /* Parse options */
+ while (argc > 0) {
+ if (is_prefix(*argv, "scope") || is_prefix(*argv, "group-by")) {
+ NEXT_ARG();
+
+ if (!REQ_ARGS(1))
+ return -1;
+
+ if (is_prefix(*argv, "queue")) {
+ scope = NETDEV_QSTATS_SCOPE_QUEUE;
+ } else if (is_prefix(*argv, "device")) {
+ scope = 0;
+ } else {
+ p_err("invalid scope value '%s'", *argv);
+ return -1;
+ }
+ NEXT_ARG();
+ } else {
+ p_err("unknown option '%s'", *argv);
+ return -1;
+ }
+ }
+
+ ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!ys) {
+ p_err("YNL: %s", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_qstats_get_req_alloc();
+ if (!req) {
+ p_err("failed to allocate qstats request");
+ ret = -1;
+ goto exit_close;
+ }
+
+ if (scope)
+ netdev_qstats_get_req_set_scope(req, scope);
+
+ qstats = netdev_qstats_get_dump(ys, req);
+ netdev_qstats_get_req_free(req);
+ if (!qstats) {
+ p_err("failed to get queue stats: %s", ys->err.msg);
+ ret = -1;
+ goto exit_close;
+ }
+
+ /* Print the stats as returned by the kernel */
+ if (json_output)
+ print_json_qstats(qstats);
+ else
+ print_plain_qstats(qstats);
+
+ netdev_qstats_get_list_free(qstats);
+exit_close:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static void compute_stats(__u64 *values, unsigned int count,
+ double *mean, double *stddev, __u64 *min, __u64 *max)
+{
+ double sum = 0.0, variance = 0.0;
+ unsigned int i;
+
+ *min = ~0ULL;
+ *max = 0;
+
+ if (count == 0) {
+ *mean = 0;
+ *stddev = 0;
+ *min = 0;
+ return;
+ }
+
+ for (i = 0; i < count; i++) {
+ sum += values[i];
+ if (values[i] < *min)
+ *min = values[i];
+ if (values[i] > *max)
+ *max = values[i];
+ }
+
+ *mean = sum / count;
+
+ if (count > 1) {
+ for (i = 0; i < count; i++) {
+ double diff = values[i] - *mean;
+
+ variance += diff * diff;
+ }
+ *stddev = sqrt(variance / (count - 1));
+ } else {
+ *stddev = 0;
+ }
+}
+
+static void print_balance_stats(const char *name, enum netdev_queue_type type,
+ __u64 *values, unsigned int count)
+{
+ double mean, stddev, cv, ns;
+ __u64 min, max;
+
+ if ((name[0] == 'r' && type != NETDEV_QUEUE_TYPE_RX) ||
+ (name[0] == 't' && type != NETDEV_QUEUE_TYPE_TX))
+ return;
+
+ compute_stats(values, count, &mean, &stddev, &min, &max);
+
+ cv = mean > 0 ? (stddev / mean) * 100.0 : 0.0;
+ ns = min + max > 0 ? (double)2 * (max - min) / (max + min) * 100 : 0.0;
+
+ printf(" %-12s: cv=%.1f%% ns=%.1f%% stddev=%.0f\n",
+ name, cv, ns, stddev);
+ printf(" %-12s min=%llu max=%llu mean=%.0f\n",
+ "", min, max, mean);
+}
+
+static void
+print_balance_stats_json(const char *name, enum netdev_queue_type type,
+ __u64 *values, unsigned int count)
+{
+ double mean, stddev, cv, ns;
+ __u64 min, max;
+
+ if ((name[0] == 'r' && type != NETDEV_QUEUE_TYPE_RX) ||
+ (name[0] == 't' && type != NETDEV_QUEUE_TYPE_TX))
+ return;
+
+ compute_stats(values, count, &mean, &stddev, &min, &max);
+
+ cv = mean > 0 ? (stddev / mean) * 100.0 : 0.0;
+ ns = min + max > 0 ? (double)2 * (max - min) / (max + min) * 100 : 0.0;
+
+ jsonw_name(json_wtr, name);
+ jsonw_start_object(json_wtr);
+ jsonw_uint_field(json_wtr, "queue-count", count);
+ jsonw_uint_field(json_wtr, "min", min);
+ jsonw_uint_field(json_wtr, "max", max);
+ jsonw_float_field(json_wtr, "mean", mean);
+ jsonw_float_field(json_wtr, "stddev", stddev);
+ jsonw_float_field(json_wtr, "coefficient-of-variation", cv);
+ jsonw_float_field(json_wtr, "normalized-spread", ns);
+ jsonw_end_object(json_wtr);
+}
+
+static int cmp_ifindex_type(const void *a, const void *b)
+{
+ const struct netdev_qstats_get_rsp *qa = a;
+ const struct netdev_qstats_get_rsp *qb = b;
+
+ if (qa->ifindex != qb->ifindex)
+ return qa->ifindex - qb->ifindex;
+ if (qa->queue_type != qb->queue_type)
+ return qa->queue_type - qb->queue_type;
+ return qa->queue_id - qb->queue_id;
+}
+
+static int do_balance(int argc, char **argv __attribute__((unused)))
+{
+ struct netdev_qstats_get_list *qstats;
+ struct netdev_qstats_get_req *req;
+ struct netdev_qstats_get_rsp **sorted;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+ unsigned int count = 0;
+ unsigned int i, j;
+ int ret = 0;
+
+ if (argc > 0) {
+ p_err("balance command takes no arguments");
+ return -1;
+ }
+
+ ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!ys) {
+ p_err("YNL: %s", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_qstats_get_req_alloc();
+ if (!req) {
+ p_err("failed to allocate qstats request");
+ ret = -1;
+ goto exit_close;
+ }
+
+ /* Always use queue scope for balance analysis */
+ netdev_qstats_get_req_set_scope(req, NETDEV_QSTATS_SCOPE_QUEUE);
+
+ qstats = netdev_qstats_get_dump(ys, req);
+ netdev_qstats_get_req_free(req);
+ if (!qstats) {
+ p_err("failed to get queue stats: %s", ys->err.msg);
+ ret = -1;
+ goto exit_close;
+ }
+
+ /* Count and sort queues */
+ ynl_dump_foreach(qstats, qs)
+ count++;
+
+ if (count == 0) {
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ else
+ printf("No queue statistics available\n");
+ goto exit_free_qstats;
+ }
+
+ sorted = calloc(count, sizeof(*sorted));
+ if (!sorted) {
+ p_err("failed to allocate sorted array");
+ ret = -1;
+ goto exit_free_qstats;
+ }
+
+ i = 0;
+ ynl_dump_foreach(qstats, qs)
+ sorted[i++] = qs;
+
+ qsort(sorted, count, sizeof(*sorted), cmp_ifindex_type);
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+
+ /* Process each device/queue-type combination */
+ i = 0;
+ while (i < count) {
+ __u64 *rx_packets, *rx_bytes, *tx_packets, *tx_bytes;
+ enum netdev_queue_type type = sorted[i]->queue_type;
+ unsigned int ifindex = sorted[i]->ifindex;
+ unsigned int queue_count = 0;
+ char ifname[IF_NAMESIZE];
+ const char *name;
+
+ /* Count queues for this device/type */
+ for (j = i; j < count && sorted[j]->ifindex == ifindex &&
+ sorted[j]->queue_type == type; j++)
+ queue_count++;
+
+ /* Skip if no packets/bytes (inactive queues) */
+ if (!sorted[i]->_present.rx_packets &&
+ !sorted[i]->_present.rx_bytes &&
+ !sorted[i]->_present.tx_packets &&
+ !sorted[i]->_present.tx_bytes)
+ goto next_ifc;
+
+ /* Allocate arrays for statistics */
+ rx_packets = calloc(queue_count, sizeof(*rx_packets));
+ rx_bytes = calloc(queue_count, sizeof(*rx_bytes));
+ tx_packets = calloc(queue_count, sizeof(*tx_packets));
+ tx_bytes = calloc(queue_count, sizeof(*tx_bytes));
+
+ if (!rx_packets || !rx_bytes || !tx_packets || !tx_bytes) {
+ p_err("failed to allocate statistics arrays");
+ free(rx_packets);
+ free(rx_bytes);
+ free(tx_packets);
+ free(tx_bytes);
+ ret = -1;
+ goto exit_free_sorted;
+ }
+
+ /* Collect statistics */
+ for (j = 0; j < queue_count; j++) {
+ rx_packets[j] = sorted[i + j]->_present.rx_packets ?
+ sorted[i + j]->rx_packets : 0;
+ rx_bytes[j] = sorted[i + j]->_present.rx_bytes ?
+ sorted[i + j]->rx_bytes : 0;
+ tx_packets[j] = sorted[i + j]->_present.tx_packets ?
+ sorted[i + j]->tx_packets : 0;
+ tx_bytes[j] = sorted[i + j]->_present.tx_bytes ?
+ sorted[i + j]->tx_bytes : 0;
+ }
+
+ name = if_indextoname(ifindex, ifname);
+
+ if (json_output) {
+ jsonw_start_object(json_wtr);
+ if (name)
+ jsonw_string_field(json_wtr, "ifname", name);
+ jsonw_uint_field(json_wtr, "ifindex", ifindex);
+ jsonw_string_field(json_wtr, "queue-type",
+ netdev_queue_type_str(type));
+
+ print_balance_stats_json("rx-packets", type,
+ rx_packets, queue_count);
+ print_balance_stats_json("rx-bytes", type,
+ rx_bytes, queue_count);
+ print_balance_stats_json("tx-packets", type,
+ tx_packets, queue_count);
+ print_balance_stats_json("tx-bytes", type,
+ tx_bytes, queue_count);
+
+ jsonw_end_object(json_wtr);
+ } else {
+ if (name)
+ printf("%s", name);
+ else
+ printf("ifindex:%u", ifindex);
+ printf(" %s %d queues:\n",
+ netdev_queue_type_str(type), queue_count);
+
+ print_balance_stats("rx-packets", type,
+ rx_packets, queue_count);
+ print_balance_stats("rx-bytes", type,
+ rx_bytes, queue_count);
+ print_balance_stats("tx-packets", type,
+ tx_packets, queue_count);
+ print_balance_stats("tx-bytes", type,
+ tx_bytes, queue_count);
+ printf("\n");
+ }
+
+ free(rx_packets);
+ free(rx_bytes);
+ free(tx_packets);
+ free(tx_bytes);
+
+next_ifc:
+ i += queue_count;
+ }
+
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+exit_free_sorted:
+ free(sorted);
+exit_free_qstats:
+ netdev_qstats_get_list_free(qstats);
+exit_close:
+ ynl_sock_destroy(ys);
+ return ret;
+}
+
+static int do_help(int argc __attribute__((unused)),
+ char **argv __attribute__((unused)))
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %s qstats { COMMAND | help }\n"
+ " %s qstats [ show ] [ OPTIONS ]\n"
+ " %s qstats balance\n"
+ "\n"
+ " OPTIONS := { scope queue | group-by { device | queue } }\n"
+ "\n"
+ " show - Display queue statistics (default)\n"
+ " Statistics are aggregated for the entire device.\n"
+ " show scope queue - Display per-queue statistics\n"
+ " show group-by device - Display device-aggregated statistics (default)\n"
+ " show group-by queue - Display per-queue statistics\n"
+ " balance - Analyze traffic distribution balance.\n"
+ "",
+ bin_name, bin_name, bin_name);
+
+ return 0;
+}
+
+static const struct cmd qstats_cmds[] = {
+ { "show", do_show },
+ { "balance", do_balance },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_qstats(int argc, char **argv)
+{
+ return cmd_select(qstats_cmds, argc, argv, do_help);
+}
diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
index 4faa4dd72f35..73d883128511 100644
--- a/tools/objtool/.gitignore
+++ b/tools/objtool/.gitignore
@@ -1,5 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
+arch/x86/lib/cpu-feature-names.c
arch/x86/lib/inat-tables.c
/objtool
+feature
+FEATURE-DUMP.objtool
fixdep
libsubcmd/
diff --git a/tools/objtool/Build b/tools/objtool/Build
index a3cdf8af6635..600da051af12 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -8,13 +8,17 @@ objtool-y += builtin-check.o
objtool-y += elf.o
objtool-y += objtool.o
-objtool-$(BUILD_ORC) += orc_gen.o
-objtool-$(BUILD_ORC) += orc_dump.o
+objtool-$(BUILD_DISAS) += disas.o
+objtool-$(BUILD_DISAS) += trace.o
+
+objtool-$(BUILD_ORC) += orc_gen.o orc_dump.o
+objtool-$(BUILD_KLP) += builtin-klp.o klp-diff.o klp-post-link.o
objtool-y += libstring.o
objtool-y += libctype.o
objtool-y += str_error_r.o
objtool-y += librbtree.o
+objtool-y += signal.o
$(OUTPUT)libstring.o: ../lib/string.c FORCE
$(call rule_mkdir)
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 8c20361dd100..ad6e1ec706ce 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -2,6 +2,28 @@
include ../scripts/Makefile.include
include ../scripts/Makefile.arch
+ifeq ($(SRCARCH),x86)
+ BUILD_ORC := y
+ ARCH_HAS_KLP := y
+endif
+
+ifeq ($(SRCARCH),loongarch)
+ BUILD_ORC := y
+endif
+
+ifeq ($(ARCH_HAS_KLP),y)
+ HAVE_XXHASH = $(shell printf "$(pound)include <xxhash.h>\nXXH3_state_t *state;int main() {}" | \
+ $(HOSTCC) -xc - -o /dev/null -lxxhash 2> /dev/null && echo y || echo n)
+ ifeq ($(HAVE_XXHASH),y)
+ BUILD_KLP := y
+ LIBXXHASH_CFLAGS := $(shell $(HOSTPKG_CONFIG) libxxhash --cflags 2>/dev/null) \
+ -DBUILD_KLP
+ LIBXXHASH_LIBS := $(shell $(HOSTPKG_CONFIG) libxxhash --libs 2>/dev/null || echo -lxxhash)
+ endif
+endif
+
+export BUILD_ORC BUILD_KLP
+
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
@@ -23,6 +45,11 @@ LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lel
all: $(OBJTOOL)
+WARNINGS := -Werror -Wall -Wextra -Wmissing-prototypes \
+ -Wmissing-declarations -Wwrite-strings \
+ -Wno-implicit-fallthrough -Wno-sign-compare \
+ -Wno-unused-parameter
+
INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/include/uapi \
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
@@ -30,11 +57,11 @@ INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/objtool/include \
-I$(srctree)/tools/objtool/arch/$(SRCARCH)/include \
-I$(LIBSUBCMD_OUTPUT)/include
-# Note, EXTRA_WARNINGS here was determined for CC and not HOSTCC, it
-# is passed here to match a legacy behavior.
-WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
-OBJTOOL_CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
-OBJTOOL_LDFLAGS := $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
+
+OBJTOOL_CFLAGS := -std=gnu11 -fomit-frame-pointer -O2 -g $(WARNINGS) \
+ $(INCLUDES) $(LIBELF_FLAGS) $(LIBXXHASH_CFLAGS) $(HOSTCFLAGS)
+
+OBJTOOL_LDFLAGS := $(LIBSUBCMD) $(LIBELF_LIBS) $(LIBXXHASH_LIBS) $(HOSTLDFLAGS)
# Allow old libelf to be used:
elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(HOSTCC) $(OBJTOOL_CFLAGS) -x c -E - 2>/dev/null | grep elf_getshdr)
@@ -43,20 +70,32 @@ OBJTOOL_CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
# Always want host compilation.
HOST_OVERRIDES := CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)"
-AWK = awk
-MKDIR = mkdir
+#
+# To support disassembly, objtool needs libopcodes which is provided
+# with libbdf (binutils-dev or binutils-devel package).
+#
+FEATURE_USER = .objtool
+FEATURE_TESTS = libbfd disassembler-init-styled
+FEATURE_DISPLAY =
+include $(srctree)/tools/build/Makefile.feature
+
+ifeq ($(feature-disassembler-init-styled), 1)
+ OBJTOOL_CFLAGS += -DDISASM_INIT_STYLED
+endif
-BUILD_ORC := n
+BUILD_DISAS := n
-ifeq ($(SRCARCH),x86)
- BUILD_ORC := y
+ifeq ($(feature-libbfd),1)
+ BUILD_DISAS := y
+ OBJTOOL_CFLAGS += -DDISAS -DPACKAGE="objtool"
+ OBJTOOL_LDFLAGS += -lopcodes
endif
-ifeq ($(SRCARCH),loongarch)
- BUILD_ORC := y
-endif
+export BUILD_DISAS
+
+AWK = awk
+MKDIR = mkdir
-export BUILD_ORC
export srctree OUTPUT CFLAGS SRCARCH AWK
include $(srctree)/tools/build/Makefile.include
@@ -86,7 +125,10 @@ $(LIBSUBCMD)-clean:
clean: $(LIBSUBCMD)-clean
$(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL)
$(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+ $(Q)$(RM) $(OUTPUT)arch/x86/lib/cpu-feature-names.c $(OUTPUT)fixdep
$(Q)$(RM) $(OUTPUT)arch/x86/lib/inat-tables.c $(OUTPUT)fixdep
+ $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.objtool
+ $(Q)$(RM) -r -- $(OUTPUT)feature
FORCE:
diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c
index b6fdc68053cc..6cd288150f49 100644
--- a/tools/objtool/arch/loongarch/decode.c
+++ b/tools/objtool/arch/loongarch/decode.c
@@ -1,13 +1,25 @@
// SPDX-License-Identifier: GPL-2.0-or-later
#include <string.h>
#include <objtool/check.h>
+#include <objtool/disas.h>
#include <objtool/warn.h>
#include <asm/inst.h>
#include <asm/orc_types.h>
#include <linux/objtool_types.h>
#include <arch/elf.h>
-int arch_ftrace_match(char *name)
+const char *arch_reg_name[CFI_NUM_REGS] = {
+ "zero", "ra", "tp", "sp",
+ "a0", "a1", "a2", "a3",
+ "a4", "a5", "a6", "a7",
+ "t0", "t1", "t2", "t3",
+ "t4", "t5", "t6", "t7",
+ "t8", "u0", "fp", "s0",
+ "s1", "s2", "s3", "s4",
+ "s5", "s6", "s7", "s8"
+};
+
+int arch_ftrace_match(const char *name)
{
return !strcmp(name, "_mcount");
}
@@ -17,9 +29,9 @@ unsigned long arch_jump_destination(struct instruction *insn)
return insn->offset + (insn->immediate << 2);
}
-unsigned long arch_dest_reloc_offset(int addend)
+s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc)
{
- return addend;
+ return reloc_addend(reloc);
}
bool arch_pc_relative_reloc(struct reloc *reloc)
@@ -278,6 +290,25 @@ static bool decode_insn_reg2i16_fomat(union loongarch_instruction inst,
return true;
}
+static bool decode_insn_reg3_fomat(union loongarch_instruction inst,
+ struct instruction *insn)
+{
+ switch (inst.reg3_format.opcode) {
+ case amswapw_op:
+ if (inst.reg3_format.rd == LOONGARCH_GPR_ZERO &&
+ inst.reg3_format.rk == LOONGARCH_GPR_RA &&
+ inst.reg3_format.rj == LOONGARCH_GPR_ZERO) {
+ /* amswap.w $zero, $ra, $zero */
+ insn->type = INSN_BUG;
+ }
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
unsigned long offset, unsigned int maxlen,
struct instruction *insn)
@@ -309,11 +340,19 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
return 0;
if (decode_insn_reg2i16_fomat(inst, insn))
return 0;
+ if (decode_insn_reg3_fomat(inst, insn))
+ return 0;
- if (inst.word == 0)
+ if (inst.word == 0) {
+ /* andi $zero, $zero, 0x0 */
insn->type = INSN_NOP;
- else if (inst.reg0i15_format.opcode == break_op) {
- /* break */
+ } else if (inst.reg0i15_format.opcode == break_op &&
+ inst.reg0i15_format.immediate == 0x0) {
+ /* break 0x0 */
+ insn->type = INSN_TRAP;
+ } else if (inst.reg0i15_format.opcode == break_op &&
+ inst.reg0i15_format.immediate == 0x1) {
+ /* break 0x1 */
insn->type = INSN_BUG;
} else if (inst.reg2_format.opcode == ertn_op) {
/* ertn */
@@ -387,3 +426,14 @@ unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *tabl
return reloc->sym->offset + reloc_addend(reloc);
}
}
+
+#ifdef DISAS
+
+int arch_disas_info_init(struct disassemble_info *dinfo)
+{
+ return disas_info_init(dinfo, bfd_arch_loongarch,
+ bfd_mach_loongarch32, bfd_mach_loongarch64,
+ NULL);
+}
+
+#endif /* DISAS */
diff --git a/tools/objtool/arch/loongarch/orc.c b/tools/objtool/arch/loongarch/orc.c
index b58c5ff443c9..ffd3a3c858ae 100644
--- a/tools/objtool/arch/loongarch/orc.c
+++ b/tools/objtool/arch/loongarch/orc.c
@@ -5,7 +5,6 @@
#include <objtool/check.h>
#include <objtool/orc.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruction *insn)
{
diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c
index e39f86d97002..aba774109437 100644
--- a/tools/objtool/arch/loongarch/special.c
+++ b/tools/objtool/arch/loongarch/special.c
@@ -27,6 +27,7 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file,
struct table_info *next_table;
unsigned long tmp_insn_offset;
unsigned long tmp_rodata_offset;
+ bool is_valid_list = false;
rsec = find_section_by_name(file->elf, ".rela.discard.tablejump_annotate");
if (!rsec)
@@ -35,6 +36,12 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file,
INIT_LIST_HEAD(&table_list);
for_each_reloc(rsec, reloc) {
+ if (reloc->sym->sec->rodata)
+ continue;
+
+ if (strcmp(insn->sec->name, reloc->sym->sec->name))
+ continue;
+
orig_table = malloc(sizeof(struct table_info));
if (!orig_table) {
WARN("malloc failed");
@@ -49,6 +56,22 @@ static void get_rodata_table_size_by_table_annotate(struct objtool_file *file,
if (reloc_idx(reloc) + 1 == sec_num_entries(rsec))
break;
+
+ if (strcmp(insn->sec->name, (reloc + 1)->sym->sec->name)) {
+ list_for_each_entry(orig_table, &table_list, jump_info) {
+ if (orig_table->insn_offset == insn->offset) {
+ is_valid_list = true;
+ break;
+ }
+ }
+
+ if (!is_valid_list) {
+ list_del_init(&table_list);
+ continue;
+ }
+
+ break;
+ }
}
list_for_each_entry(orig_table, &table_list, jump_info) {
@@ -171,3 +194,8 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
return rodata_reloc;
}
+
+const char *arch_cpu_feature_name(int feature_number)
+{
+ return NULL;
+}
diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
index c851c51d4bd3..e534ac1123b3 100644
--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -3,20 +3,32 @@
#include <stdio.h>
#include <stdlib.h>
#include <objtool/check.h>
+#include <objtool/disas.h>
#include <objtool/elf.h>
#include <objtool/arch.h>
#include <objtool/warn.h>
#include <objtool/builtin.h>
-#include <objtool/endianness.h>
-int arch_ftrace_match(char *name)
+const char *arch_reg_name[CFI_NUM_REGS] = {
+ "r0", "sp", "r2", "r3",
+ "r4", "r5", "r6", "r7",
+ "r8", "r9", "r10", "r11",
+ "r12", "r13", "r14", "r15",
+ "r16", "r17", "r18", "r19",
+ "r20", "r21", "r22", "r23",
+ "r24", "r25", "r26", "r27",
+ "r28", "r29", "r30", "r31",
+ "ra"
+};
+
+int arch_ftrace_match(const char *name)
{
return !strcmp(name, "_mcount");
}
-unsigned long arch_dest_reloc_offset(int addend)
+s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc)
{
- return addend;
+ return reloc_addend(reloc);
}
bool arch_callee_saved_reg(unsigned char reg)
@@ -128,3 +140,14 @@ unsigned int arch_reloc_size(struct reloc *reloc)
return 8;
}
}
+
+#ifdef DISAS
+
+int arch_disas_info_init(struct disassemble_info *dinfo)
+{
+ return disas_info_init(dinfo, bfd_arch_powerpc,
+ bfd_mach_ppc, bfd_mach_ppc64,
+ NULL);
+}
+
+#endif /* DISAS */
diff --git a/tools/objtool/arch/powerpc/special.c b/tools/objtool/arch/powerpc/special.c
index 51610689abf7..8f9bf61ca089 100644
--- a/tools/objtool/arch/powerpc/special.c
+++ b/tools/objtool/arch/powerpc/special.c
@@ -18,3 +18,8 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
{
exit(-1);
}
+
+const char *arch_cpu_feature_name(int feature_number)
+{
+ return NULL;
+}
diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build
index 3dedb2fd8f3a..febee0b8ee0b 100644
--- a/tools/objtool/arch/x86/Build
+++ b/tools/objtool/arch/x86/Build
@@ -1,5 +1,5 @@
-objtool-y += special.o
objtool-y += decode.o
+objtool-y += special.o
objtool-y += orc.o
inat_tables_script = ../arch/x86/tools/gen-insn-attr-x86.awk
@@ -12,3 +12,14 @@ $(OUTPUT)arch/x86/lib/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/lib/inat-tables.c
CFLAGS_decode.o += -I$(OUTPUT)arch/x86/lib
+
+cpu_features = ../arch/x86/include/asm/cpufeatures.h
+cpu_features_script = ../arch/x86/tools/gen-cpu-feature-names-x86.awk
+
+$(OUTPUT)arch/x86/lib/cpu-feature-names.c: $(cpu_features_script) $(cpu_features)
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,gen)$(AWK) -f $(cpu_features_script) $(cpu_features) > $@
+
+$(OUTPUT)arch/x86/special.o: $(OUTPUT)arch/x86/lib/cpu-feature-names.c
+
+CFLAGS_special.o += -I$(OUTPUT)arch/x86/lib
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 98c4713c1b09..f4af82508228 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -16,14 +16,22 @@
#include <asm/orc_types.h>
#include <objtool/check.h>
+#include <objtool/disas.h>
#include <objtool/elf.h>
#include <objtool/arch.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
#include <objtool/builtin.h>
#include <arch/elf.h>
-int arch_ftrace_match(char *name)
+const char *arch_reg_name[CFI_NUM_REGS] = {
+ "rax", "rcx", "rdx", "rbx",
+ "rsp", "rbp", "rsi", "rdi",
+ "r8", "r9", "r10", "r11",
+ "r12", "r13", "r14", "r15",
+ "ra"
+};
+
+int arch_ftrace_match(const char *name)
{
return !strcmp(name, "__fentry__");
}
@@ -68,9 +76,65 @@ bool arch_callee_saved_reg(unsigned char reg)
}
}
-unsigned long arch_dest_reloc_offset(int addend)
+/* Undo the effects of __pa_symbol() if necessary */
+static unsigned long phys_to_virt(unsigned long pa)
+{
+ s64 va = pa;
+
+ if (va > 0)
+ va &= ~(0x80000000);
+
+ return va;
+}
+
+s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc)
+{
+ s64 addend = reloc_addend(reloc);
+
+ if (arch_pc_relative_reloc(reloc))
+ addend += insn->offset + insn->len - reloc_offset(reloc);
+
+ return phys_to_virt(addend);
+}
+
+static void scan_for_insn(struct section *sec, unsigned long offset,
+ unsigned long *insn_off, unsigned int *insn_len)
+{
+ unsigned long o = 0;
+ struct insn insn;
+
+ while (1) {
+
+ insn_decode(&insn, sec->data->d_buf + o, sec_size(sec) - o,
+ INSN_MODE_64);
+
+ if (o + insn.length > offset) {
+ *insn_off = o;
+ *insn_len = insn.length;
+ return;
+ }
+
+ o += insn.length;
+ }
+}
+
+u64 arch_adjusted_addend(struct reloc *reloc)
{
- return addend + 4;
+ unsigned int type = reloc_type(reloc);
+ s64 addend = reloc_addend(reloc);
+ unsigned long insn_off;
+ unsigned int insn_len;
+
+ if (type == R_X86_64_PLT32)
+ return addend + 4;
+
+ if (type != R_X86_64_PC32 || !is_text_sec(reloc->sec->base))
+ return addend;
+
+ scan_for_insn(reloc->sec->base, reloc_offset(reloc),
+ &insn_off, &insn_len);
+
+ return addend + insn_off + insn_len - reloc_offset(reloc);
}
unsigned long arch_jump_destination(struct instruction *insn)
@@ -189,15 +253,6 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
op2 = ins.opcode.bytes[1];
op3 = ins.opcode.bytes[2];
- /*
- * XXX hack, decoder is buggered and thinks 0xea is 7 bytes long.
- */
- if (op1 == 0xea) {
- insn->len = 1;
- insn->type = INSN_BUG;
- return 0;
- }
-
if (ins.rex_prefix.nbytes) {
rex = ins.rex_prefix.bytes[0];
rex_w = X86_REX_W(rex) >> 3;
@@ -503,6 +558,12 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
break;
case 0x90:
+ if (rex_b) /* XCHG %r8, %rax */
+ break;
+
+ if (prefix == 0xf3) /* REP NOP := PAUSE */
+ break;
+
insn->type = INSN_NOP;
break;
@@ -556,13 +617,14 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
} else if (op2 == 0x0b || op2 == 0xb9) {
- /* ud2 */
+ /* ud2, ud1 */
insn->type = INSN_BUG;
- } else if (op2 == 0x0d || op2 == 0x1f) {
+ } else if (op2 == 0x1f) {
- /* nopl/nopw */
- insn->type = INSN_NOP;
+ /* 0f 1f /0 := NOPL */
+ if (modrm_reg == 0)
+ insn->type = INSN_NOP;
} else if (op2 == 0x1e) {
@@ -692,6 +754,10 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
insn->type = INSN_SYSRET;
break;
+ case 0xd6: /* udb */
+ insn->type = INSN_BUG;
+ break;
+
case 0xe0: /* loopne */
case 0xe1: /* loope */
case 0xe2: /* loop */
@@ -880,3 +946,26 @@ unsigned int arch_reloc_size(struct reloc *reloc)
return 8;
}
}
+
+bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc)
+{
+ switch (reloc_type(reloc)) {
+ case R_X86_64_32:
+ case R_X86_64_32S:
+ case R_X86_64_64:
+ return true;
+ default:
+ return false;
+ }
+}
+
+#ifdef DISAS
+
+int arch_disas_info_init(struct disassemble_info *dinfo)
+{
+ return disas_info_init(dinfo, bfd_arch_i386,
+ bfd_mach_i386_i386, bfd_mach_x86_64,
+ "att");
+}
+
+#endif /* DISAS */
diff --git a/tools/objtool/arch/x86/orc.c b/tools/objtool/arch/x86/orc.c
index 7176b9ec5b05..735e150ca6b7 100644
--- a/tools/objtool/arch/x86/orc.c
+++ b/tools/objtool/arch/x86/orc.c
@@ -5,7 +5,6 @@
#include <objtool/check.h>
#include <objtool/orc.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi, struct instruction *insn)
{
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index 06ca4a2659a4..e817a3fff449 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -4,6 +4,10 @@
#include <objtool/special.h>
#include <objtool/builtin.h>
#include <objtool/warn.h>
+#include <asm/cpufeatures.h>
+
+/* cpu feature name array generated from cpufeatures.h */
+#include "cpu-feature-names.c"
void arch_handle_alternative(struct special_alt *alt)
{
@@ -89,7 +93,7 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
/* look for a relocation which references .rodata */
text_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
insn->offset, insn->len);
- if (!text_reloc || text_reloc->sym->type != STT_SECTION ||
+ if (!text_reloc || !is_sec_sym(text_reloc->sym) ||
!text_reloc->sym->sec->rodata)
return NULL;
@@ -134,3 +138,9 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
*table_size = 0;
return rodata_reloc;
}
+
+const char *arch_cpu_feature_name(int feature_number)
+{
+ return (feature_number < ARRAY_SIZE(cpu_feature_names)) ?
+ cpu_feature_names[feature_number] : NULL;
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 80239843e9f0..b780df513715 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -73,34 +73,41 @@ static int parse_hacks(const struct option *opt, const char *str, int unset)
static const struct option check_options[] = {
OPT_GROUP("Actions:"),
+ OPT_BOOLEAN(0, "checksum", &opts.checksum, "generate per-function checksums"),
+ OPT_BOOLEAN(0, "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"),
+ OPT_STRING_OPTARG('d', "disas", &opts.disas, "function-pattern", "disassemble functions", "*"),
OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr,skylake", "patch toolchain bugs/limitations", parse_hacks),
- OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
- OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
- OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
- OPT_BOOLEAN(0, "orc", &opts.orc, "generate ORC metadata"),
- OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
- OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
- OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"),
- OPT_INTEGER(0, "prefix", &opts.prefix, "generate prefix symbols"),
- OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
- OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
- OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
- OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
- OPT_BOOLEAN(0 , "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"),
- OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
+ OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
+ OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
+ OPT_BOOLEAN(0, "noabs", &opts.noabs, "reject absolute references in allocatable sections"),
+ OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
+ OPT_BOOLEAN(0, "orc", &opts.orc, "generate ORC metadata"),
+ OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
+ OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
+ OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"),
+ OPT_INTEGER(0, "prefix", &opts.prefix, "generate prefix symbols"),
+ OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
+ OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
+ OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
+ OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
+ OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
OPT_GROUP("Options:"),
- OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"),
- OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"),
- OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"),
- OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"),
- OPT_BOOLEAN(0, "mnop", &opts.mnop, "nop out mcount call sites"),
- OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
- OPT_STRING('o', "output", &opts.output, "file", "output file name"),
- OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
- OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
- OPT_BOOLEAN('v', "verbose", &opts.verbose, "verbose warnings"),
- OPT_BOOLEAN(0, "Werror", &opts.werror, "return error on warnings"),
+ OPT_BOOLEAN(0, "backtrace", &opts.backtrace, "unwind on error"),
+ OPT_BOOLEAN(0, "backup", &opts.backup, "create backup (.orig) file on warning/error"),
+ OPT_STRING(0, "debug-checksum", &opts.debug_checksum, "funcs", "enable checksum debug output"),
+ OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"),
+ OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"),
+ OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"),
+ OPT_BOOLEAN(0, "mnop", &opts.mnop, "nop out mcount call sites"),
+ OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
+ OPT_STRING('o', "output", &opts.output, "file", "output file name"),
+ OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
+ OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
+ OPT_STRING(0, "trace", &opts.trace, "func", "trace function validation"),
+ OPT_BOOLEAN('v', "verbose", &opts.verbose, "verbose warnings"),
+ OPT_BOOLEAN(0, "werror", &opts.werror, "return error on warnings"),
+ OPT_BOOLEAN(0, "wide", &opts.wide, "wide output"),
OPT_END(),
};
@@ -158,10 +165,25 @@ static bool opts_valid(void)
return false;
}
- if (opts.hack_jump_label ||
+#ifndef BUILD_KLP
+ if (opts.checksum) {
+ ERROR("--checksum not supported; install xxhash-devel/libxxhash-dev (version >= 0.8) and recompile");
+ return false;
+ }
+#endif
+
+ if (opts.debug_checksum && !opts.checksum) {
+ ERROR("--debug-checksum requires --checksum");
+ return false;
+ }
+
+ if (opts.checksum ||
+ opts.disas ||
+ opts.hack_jump_label ||
opts.hack_noinstr ||
opts.ibt ||
opts.mcount ||
+ opts.noabs ||
opts.noinstr ||
opts.orc ||
opts.retpoline ||
@@ -241,15 +263,12 @@ static void save_argv(int argc, const char **argv)
ERROR_GLIBC("strdup(%s)", argv[i]);
exit(1);
}
- };
+ }
}
-void print_args(void)
+int make_backup(void)
{
- char *backup = NULL;
-
- if (opts.output || opts.dryrun)
- goto print;
+ char *backup;
/*
* Make a backup before kbuild deletes the file so the error
@@ -258,33 +277,32 @@ void print_args(void)
backup = malloc(strlen(objname) + strlen(ORIG_SUFFIX) + 1);
if (!backup) {
ERROR_GLIBC("malloc");
- goto print;
+ return 1;
}
strcpy(backup, objname);
strcat(backup, ORIG_SUFFIX);
- if (copy_file(objname, backup)) {
- backup = NULL;
- goto print;
- }
+ if (copy_file(objname, backup))
+ return 1;
-print:
/*
- * Print the cmdline args to make it easier to recreate. If '--output'
- * wasn't used, add it to the printed args with the backup as input.
+ * Print the cmdline args to make it easier to recreate.
*/
+
fprintf(stderr, "%s", orig_argv[0]);
for (int i = 1; i < orig_argc; i++) {
char *arg = orig_argv[i];
- if (backup && !strcmp(arg, objname))
+ /* Modify the printed args to use the backup */
+ if (!opts.output && !strcmp(arg, objname))
fprintf(stderr, " %s -o %s", backup, objname);
else
fprintf(stderr, " %s", arg);
}
fprintf(stderr, "\n");
+ return 0;
}
int objtool_run(int argc, const char **argv)
@@ -330,5 +348,5 @@ int objtool_run(int argc, const char **argv)
if (!opts.dryrun && file->elf->changed && elf_write(file->elf))
return 1;
- return 0;
+ return elf_close(file->elf);
}
diff --git a/tools/objtool/builtin-klp.c b/tools/objtool/builtin-klp.c
new file mode 100644
index 000000000000..56d5a5b92f72
--- /dev/null
+++ b/tools/objtool/builtin-klp.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <subcmd/parse-options.h>
+#include <string.h>
+#include <stdlib.h>
+#include <objtool/builtin.h>
+#include <objtool/objtool.h>
+#include <objtool/klp.h>
+
+struct subcmd {
+ const char *name;
+ const char *description;
+ int (*fn)(int, const char **);
+};
+
+static struct subcmd subcmds[] = {
+ { "diff", "Generate binary diff of two object files", cmd_klp_diff, },
+ { "post-link", "Finalize klp symbols/relocs after module linking", cmd_klp_post_link, },
+};
+
+static void cmd_klp_usage(void)
+{
+ fprintf(stderr, "usage: objtool klp <subcommand> [<options>]\n\n");
+ fprintf(stderr, "Subcommands:\n");
+
+ for (int i = 0; i < ARRAY_SIZE(subcmds); i++) {
+ struct subcmd *cmd = &subcmds[i];
+
+ fprintf(stderr, " %s\t%s\n", cmd->name, cmd->description);
+ }
+
+ exit(1);
+}
+
+int cmd_klp(int argc, const char **argv)
+{
+ argc--;
+ argv++;
+
+ if (!argc)
+ cmd_klp_usage();
+
+ if (argc) {
+ for (int i = 0; i < ARRAY_SIZE(subcmds); i++) {
+ struct subcmd *cmd = &subcmds[i];
+
+ if (!strcmp(cmd->name, argv[0]))
+ return cmd->fn(argc, argv);
+ }
+ }
+
+ cmd_klp_usage();
+ return 0;
+}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index d14f20ef1db1..3f7999317f4d 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3,6 +3,8 @@
* Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
*/
+#define _GNU_SOURCE /* memmem() */
+#include <fnmatch.h>
#include <string.h>
#include <stdlib.h>
#include <inttypes.h>
@@ -11,10 +13,13 @@
#include <objtool/builtin.h>
#include <objtool/cfi.h>
#include <objtool/arch.h>
+#include <objtool/disas.h>
#include <objtool/check.h>
#include <objtool/special.h>
+#include <objtool/trace.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
+#include <objtool/checksum.h>
+#include <objtool/util.h>
#include <linux/objtool_types.h>
#include <linux/hashtable.h>
@@ -22,11 +27,6 @@
#include <linux/static_call_types.h>
#include <linux/string.h>
-struct alternative {
- struct alternative *next;
- struct instruction *insn;
-};
-
static unsigned long nr_cfi, nr_cfi_reused, nr_cfi_cache;
static struct cfi_init_state initial_func_cfi;
@@ -34,6 +34,10 @@ static struct cfi_state init_cfi;
static struct cfi_state func_cfi;
static struct cfi_state force_undefined_cfi;
+struct disas_context *objtool_disas_ctx;
+
+size_t sym_name_max_len;
+
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset)
{
@@ -106,7 +110,7 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
#define for_each_insn(file, insn) \
for (struct section *__sec, *__fake = (struct section *)1; \
__fake; __fake = NULL) \
- for_each_sec(file, __sec) \
+ for_each_sec(file->elf, __sec) \
sec_for_each_insn(file, __sec, insn)
#define func_for_each_insn(file, func, insn) \
@@ -131,15 +135,6 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
for (insn = next_insn_same_sec(file, insn); insn; \
insn = next_insn_same_sec(file, insn))
-static inline struct symbol *insn_call_dest(struct instruction *insn)
-{
- if (insn->type == INSN_JUMP_DYNAMIC ||
- insn->type == INSN_CALL_DYNAMIC)
- return NULL;
-
- return insn->_call_dest;
-}
-
static inline struct reloc *insn_jump_table(struct instruction *insn)
{
if (insn->type == INSN_JUMP_DYNAMIC ||
@@ -186,20 +181,6 @@ static bool is_sibling_call(struct instruction *insn)
}
/*
- * Checks if a string ends with another.
- */
-static bool str_ends_with(const char *s, const char *sub)
-{
- const int slen = strlen(s);
- const int sublen = strlen(sub);
-
- if (sublen > slen)
- return 0;
-
- return !memcmp(s + slen - sublen, sub, sublen);
-}
-
-/*
* Checks if a function is a Rust "noreturn" one.
*/
static bool is_rust_noreturn(const struct symbol *func)
@@ -217,6 +198,7 @@ static bool is_rust_noreturn(const struct symbol *func)
* these come from the Rust standard library).
*/
return str_ends_with(func->name, "_4core5sliceSp15copy_from_slice17len_mismatch_fail") ||
+ str_ends_with(func->name, "_4core6option13expect_failed") ||
str_ends_with(func->name, "_4core6option13unwrap_failed") ||
str_ends_with(func->name, "_4core6result13unwrap_failed") ||
str_ends_with(func->name, "_4core9panicking5panic") ||
@@ -261,7 +243,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
if (!func)
return false;
- if (func->bind == STB_GLOBAL || func->bind == STB_WEAK) {
+ if (!is_local_sym(func)) {
if (is_rust_noreturn(func))
return true;
@@ -270,7 +252,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
return true;
}
- if (func->bind == STB_WEAK)
+ if (is_weak_sym(func))
return false;
if (!func->len)
@@ -430,14 +412,13 @@ static int decode_instructions(struct objtool_file *file)
struct symbol *func;
unsigned long offset;
struct instruction *insn;
- int ret;
- for_each_sec(file, sec) {
+ for_each_sec(file->elf, sec) {
struct instruction *insns = NULL;
u8 prev_len = 0;
u8 idx = 0;
- if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ if (!is_text_sec(sec))
continue;
if (strcmp(sec->name, ".altinstr_replacement") &&
@@ -460,9 +441,9 @@ static int decode_instructions(struct objtool_file *file)
if (!strcmp(sec->name, ".init.text") && !opts.module)
sec->init = true;
- for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
+ for (offset = 0; offset < sec_size(sec); offset += insn->len) {
if (!insns || idx == INSN_CHUNK_MAX) {
- insns = calloc(sizeof(*insn), INSN_CHUNK_SIZE);
+ insns = calloc(INSN_CHUNK_SIZE, sizeof(*insn));
if (!insns) {
ERROR_GLIBC("calloc");
return -1;
@@ -479,11 +460,8 @@ static int decode_instructions(struct objtool_file *file)
insn->offset = offset;
insn->prev_len = prev_len;
- ret = arch_decode_instruction(file, sec, offset,
- sec->sh.sh_size - offset,
- insn);
- if (ret)
- return ret;
+ if (arch_decode_instruction(file, sec, offset, sec_size(sec) - offset, insn))
+ return -1;
prev_len = insn->len;
@@ -500,12 +478,12 @@ static int decode_instructions(struct objtool_file *file)
}
sec_for_each_sym(sec, func) {
- if (func->type != STT_NOTYPE && func->type != STT_FUNC)
+ if (!is_notype_sym(func) && !is_func_sym(func))
continue;
- if (func->offset == sec->sh.sh_size) {
+ if (func->offset == sec_size(sec)) {
/* Heuristic: likely an "end" symbol */
- if (func->type == STT_NOTYPE)
+ if (is_notype_sym(func))
continue;
ERROR("%s(): STT_FUNC at end of section", func->name);
return -1;
@@ -521,7 +499,7 @@ static int decode_instructions(struct objtool_file *file)
sym_for_each_insn(file, func, insn) {
insn->sym = func;
- if (func->type == STT_FUNC &&
+ if (is_func_sym(func) &&
insn->type == INSN_ENDBR &&
list_empty(&insn->call_node)) {
if (insn->offset == func->offset) {
@@ -565,7 +543,7 @@ static int add_pv_ops(struct objtool_file *file, const char *symname)
idx = (reloc_offset(reloc) - sym->offset) / sizeof(unsigned long);
func = reloc->sym;
- if (func->type == STT_SECTION)
+ if (is_sec_sym(func))
func = find_symbol_by_offset(reloc->sym->sec,
reloc_addend(reloc));
if (!func) {
@@ -599,7 +577,7 @@ static int init_pv_ops(struct objtool_file *file)
};
const char *pv_ops;
struct symbol *sym;
- int idx, nr, ret;
+ int idx, nr;
if (!opts.noinstr)
return 0;
@@ -611,7 +589,7 @@ static int init_pv_ops(struct objtool_file *file)
return 0;
nr = sym->len / sizeof(unsigned long);
- file->pv_ops = calloc(sizeof(struct pv_state), nr);
+ file->pv_ops = calloc(nr, sizeof(struct pv_state));
if (!file->pv_ops) {
ERROR_GLIBC("calloc");
return -1;
@@ -621,14 +599,27 @@ static int init_pv_ops(struct objtool_file *file)
INIT_LIST_HEAD(&file->pv_ops[idx].targets);
for (idx = 0; (pv_ops = pv_ops_tables[idx]); idx++) {
- ret = add_pv_ops(file, pv_ops);
- if (ret)
- return ret;
+ if (add_pv_ops(file, pv_ops))
+ return -1;
}
return 0;
}
+static bool is_livepatch_module(struct objtool_file *file)
+{
+ struct section *sec;
+
+ if (!opts.module)
+ return false;
+
+ sec = find_section_by_name(file->elf, ".modinfo");
+ if (!sec)
+ return false;
+
+ return memmem(sec->data->d_buf, sec_size(sec), "\0livepatch=Y", 12);
+}
+
static int create_static_call_sections(struct objtool_file *file)
{
struct static_call_site *site;
@@ -640,8 +631,14 @@ static int create_static_call_sections(struct objtool_file *file)
sec = find_section_by_name(file->elf, ".static_call_sites");
if (sec) {
- INIT_LIST_HEAD(&file->static_call_list);
- WARN("file already has .static_call_sites section, skipping");
+ /*
+ * Livepatch modules may have already extracted the static call
+ * site entries to take advantage of vmlinux static call
+ * privileges.
+ */
+ if (!file->klp)
+ WARN("file already has .static_call_sites section, skipping");
+
return 0;
}
@@ -685,7 +682,7 @@ static int create_static_call_sections(struct objtool_file *file)
key_sym = find_symbol_by_name(file->elf, tmp);
if (!key_sym) {
- if (!opts.module) {
+ if (!opts.module || file->klp) {
ERROR("static_call: can't find static_call_key symbol: %s", tmp);
return -1;
}
@@ -828,7 +825,7 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
struct symbol *sym = insn->sym;
*site = 0;
- if (opts.module && sym && sym->type == STT_FUNC &&
+ if (opts.module && sym && is_func_sym(sym) &&
insn->offset == sym->offset &&
(!strcmp(sym->name, "init_module") ||
!strcmp(sym->name, "cleanup_module"))) {
@@ -856,14 +853,13 @@ static int create_cfi_sections(struct objtool_file *file)
sec = find_section_by_name(file->elf, ".cfi_sites");
if (sec) {
- INIT_LIST_HEAD(&file->call_list);
WARN("file already has .cfi_sites section, skipping");
return 0;
}
idx = 0;
- for_each_sym(file, sym) {
- if (sym->type != STT_FUNC)
+ for_each_sym(file->elf, sym) {
+ if (!is_func_sym(sym))
continue;
if (strncmp(sym->name, "__cfi_", 6))
@@ -878,8 +874,8 @@ static int create_cfi_sections(struct objtool_file *file)
return -1;
idx = 0;
- for_each_sym(file, sym) {
- if (sym->type != STT_FUNC)
+ for_each_sym(file->elf, sym) {
+ if (!is_func_sym(sym))
continue;
if (strncmp(sym->name, "__cfi_", 6))
@@ -905,8 +901,13 @@ static int create_mcount_loc_sections(struct objtool_file *file)
sec = find_section_by_name(file->elf, "__mcount_loc");
if (sec) {
- INIT_LIST_HEAD(&file->mcount_loc_list);
- WARN("file already has __mcount_loc section, skipping");
+ /*
+ * Livepatch modules have already extracted their __mcount_loc
+ * entries to cover the !CONFIG_FTRACE_MCOUNT_USE_OBJTOOL case.
+ */
+ if (!file->klp)
+ WARN("file already has __mcount_loc section, skipping");
+
return 0;
}
@@ -950,7 +951,6 @@ static int create_direct_call_sections(struct objtool_file *file)
sec = find_section_by_name(file->elf, ".call_sites");
if (sec) {
- INIT_LIST_HEAD(&file->call_list);
WARN("file already has .call_sites section, skipping");
return 0;
}
@@ -981,6 +981,59 @@ static int create_direct_call_sections(struct objtool_file *file)
return 0;
}
+#ifdef BUILD_KLP
+static int create_sym_checksum_section(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *sym;
+ unsigned int idx = 0;
+ struct sym_checksum *checksum;
+ size_t entsize = sizeof(struct sym_checksum);
+
+ sec = find_section_by_name(file->elf, ".discard.sym_checksum");
+ if (sec) {
+ if (!opts.dryrun)
+ WARN("file already has .discard.sym_checksum section, skipping");
+
+ return 0;
+ }
+
+ for_each_sym(file->elf, sym)
+ if (sym->csum.checksum)
+ idx++;
+
+ if (!idx)
+ return 0;
+
+ sec = elf_create_section_pair(file->elf, ".discard.sym_checksum", entsize,
+ idx, idx);
+ if (!sec)
+ return -1;
+
+ idx = 0;
+ for_each_sym(file->elf, sym) {
+ if (!sym->csum.checksum)
+ continue;
+
+ if (!elf_init_reloc(file->elf, sec->rsec, idx, idx * entsize,
+ sym, 0, R_TEXT64))
+ return -1;
+
+ checksum = (struct sym_checksum *)sec->data->d_buf + idx;
+ checksum->addr = 0; /* reloc */
+ checksum->checksum = sym->csum.checksum;
+
+ mark_sec_changed(file->elf, sec, true);
+
+ idx++;
+ }
+
+ return 0;
+}
+#else
+static int create_sym_checksum_section(struct objtool_file *file) { return -EINVAL; }
+#endif
+
/*
* Warnings shouldn't be reported for ignored functions.
*/
@@ -1432,9 +1485,14 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn,
}
static bool is_first_func_insn(struct objtool_file *file,
- struct instruction *insn, struct symbol *sym)
+ struct instruction *insn)
{
- if (insn->offset == sym->offset)
+ struct symbol *func = insn_func(insn);
+
+ if (!func)
+ return false;
+
+ if (insn->offset == func->offset)
return true;
/* Allow direct CALL/JMP past ENDBR */
@@ -1442,7 +1500,7 @@ static bool is_first_func_insn(struct objtool_file *file,
struct instruction *prev = prev_insn_same_sym(file, insn);
if (prev && prev->type == INSN_ENDBR &&
- insn->offset == sym->offset + prev->len)
+ insn->offset == func->offset + prev->len)
return true;
}
@@ -1450,44 +1508,22 @@ static bool is_first_func_insn(struct objtool_file *file,
}
/*
- * A sibling call is a tail-call to another symbol -- to differentiate from a
- * recursive tail-call which is to the same symbol.
- */
-static bool jump_is_sibling_call(struct objtool_file *file,
- struct instruction *from, struct instruction *to)
-{
- struct symbol *fs = from->sym;
- struct symbol *ts = to->sym;
-
- /* Not a sibling call if from/to a symbol hole */
- if (!fs || !ts)
- return false;
-
- /* Not a sibling call if not targeting the start of a symbol. */
- if (!is_first_func_insn(file, to, ts))
- return false;
-
- /* Disallow sibling calls into STT_NOTYPE */
- if (ts->type == STT_NOTYPE)
- return false;
-
- /* Must not be self to be a sibling */
- return fs->pfunc != ts->pfunc;
-}
-
-/*
* Find the destination instructions for all jumps.
*/
static int add_jump_destinations(struct objtool_file *file)
{
- struct instruction *insn, *jump_dest;
+ struct instruction *insn;
struct reloc *reloc;
- struct section *dest_sec;
- unsigned long dest_off;
- int ret;
for_each_insn(file, insn) {
struct symbol *func = insn_func(insn);
+ struct instruction *dest_insn;
+ struct section *dest_sec;
+ struct symbol *dest_sym;
+ unsigned long dest_off;
+
+ if (!is_static_jump(insn))
+ continue;
if (insn->jump_dest) {
/*
@@ -1496,53 +1532,53 @@ static int add_jump_destinations(struct objtool_file *file)
*/
continue;
}
- if (!is_static_jump(insn))
- continue;
reloc = insn_reloc(file, insn);
if (!reloc) {
dest_sec = insn->sec;
dest_off = arch_jump_destination(insn);
- } else if (reloc->sym->type == STT_SECTION) {
- dest_sec = reloc->sym->sec;
- dest_off = arch_dest_reloc_offset(reloc_addend(reloc));
- } else if (reloc->sym->retpoline_thunk) {
- ret = add_retpoline_call(file, insn);
- if (ret)
- return ret;
- continue;
- } else if (reloc->sym->return_thunk) {
- add_return_call(file, insn, true);
- continue;
- } else if (func) {
- /*
- * External sibling call or internal sibling call with
- * STT_FUNC reloc.
- */
- ret = add_call_dest(file, insn, reloc->sym, true);
- if (ret)
- return ret;
- continue;
- } else if (reloc->sym->sec->idx) {
- dest_sec = reloc->sym->sec;
- dest_off = reloc->sym->sym.st_value +
- arch_dest_reloc_offset(reloc_addend(reloc));
+ dest_sym = dest_sec->sym;
} else {
- /* non-func asm code jumping to another file */
- continue;
+ dest_sym = reloc->sym;
+ if (is_undef_sym(dest_sym)) {
+ if (dest_sym->retpoline_thunk) {
+ if (add_retpoline_call(file, insn))
+ return -1;
+ continue;
+ }
+
+ if (dest_sym->return_thunk) {
+ add_return_call(file, insn, true);
+ continue;
+ }
+
+ /* External symbol */
+ if (func) {
+ /* External sibling call */
+ if (add_call_dest(file, insn, dest_sym, true))
+ return -1;
+ continue;
+ }
+
+ /* Non-func asm code jumping to external symbol */
+ continue;
+ }
+
+ dest_sec = dest_sym->sec;
+ dest_off = dest_sym->offset + arch_insn_adjusted_addend(insn, reloc);
}
- jump_dest = find_insn(file, dest_sec, dest_off);
- if (!jump_dest) {
+ dest_insn = find_insn(file, dest_sec, dest_off);
+ if (!dest_insn) {
struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
/*
- * This is a special case for retbleed_untrain_ret().
- * It jumps to __x86_return_thunk(), but objtool
- * can't find the thunk's starting RET
- * instruction, because the RET is also in the
- * middle of another instruction. Objtool only
- * knows about the outer instruction.
+ * retbleed_untrain_ret() jumps to
+ * __x86_return_thunk(), but objtool can't find
+ * the thunk's starting RET instruction,
+ * because the RET is also in the middle of
+ * another instruction. Objtool only knows
+ * about the outer instruction.
*/
if (sym && sym->embedded_insn) {
add_return_call(file, insn, false);
@@ -1550,76 +1586,52 @@ static int add_jump_destinations(struct objtool_file *file)
}
/*
- * GCOV/KCOV dead code can jump to the end of the
- * function/section.
+ * GCOV/KCOV dead code can jump to the end of
+ * the function/section.
*/
if (file->ignore_unreachables && func &&
dest_sec == insn->sec &&
dest_off == func->offset + func->len)
continue;
- ERROR_INSN(insn, "can't find jump dest instruction at %s+0x%lx",
- dest_sec->name, dest_off);
+ ERROR_INSN(insn, "can't find jump dest instruction at %s",
+ offstr(dest_sec, dest_off));
return -1;
}
- /*
- * An intra-TU jump in retpoline.o might not have a relocation
- * for its jump dest, in which case the above
- * add_{retpoline,return}_call() didn't happen.
- */
- if (jump_dest->sym && jump_dest->offset == jump_dest->sym->offset) {
- if (jump_dest->sym->retpoline_thunk) {
- ret = add_retpoline_call(file, insn);
- if (ret)
- return ret;
- continue;
- }
- if (jump_dest->sym->return_thunk) {
- add_return_call(file, insn, true);
- continue;
- }
+ if (!dest_sym || is_sec_sym(dest_sym)) {
+ dest_sym = dest_insn->sym;
+ if (!dest_sym)
+ goto set_jump_dest;
}
- /*
- * Cross-function jump.
- */
- if (func && insn_func(jump_dest) && func != insn_func(jump_dest)) {
+ if (dest_sym->retpoline_thunk && dest_insn->offset == dest_sym->offset) {
+ if (add_retpoline_call(file, insn))
+ return -1;
+ continue;
+ }
- /*
- * For GCC 8+, create parent/child links for any cold
- * subfunctions. This is _mostly_ redundant with a
- * similar initialization in read_symbols().
- *
- * If a function has aliases, we want the *first* such
- * function in the symbol table to be the subfunction's
- * parent. In that case we overwrite the
- * initialization done in read_symbols().
- *
- * However this code can't completely replace the
- * read_symbols() code because this doesn't detect the
- * case where the parent function's only reference to a
- * subfunction is through a jump table.
- */
- if (!strstr(func->name, ".cold") &&
- strstr(insn_func(jump_dest)->name, ".cold")) {
- func->cfunc = insn_func(jump_dest);
- insn_func(jump_dest)->pfunc = func;
- }
+ if (dest_sym->return_thunk && dest_insn->offset == dest_sym->offset) {
+ add_return_call(file, insn, true);
+ continue;
}
- if (jump_is_sibling_call(file, insn, jump_dest)) {
- /*
- * Internal sibling call without reloc or with
- * STT_SECTION reloc.
- */
- ret = add_call_dest(file, insn, insn_func(jump_dest), true);
- if (ret)
- return ret;
+ if (!insn->sym || insn->sym->pfunc == dest_sym->pfunc)
+ goto set_jump_dest;
+
+ /*
+ * Internal cross-function jump.
+ */
+
+ if (is_first_func_insn(file, dest_insn)) {
+ /* Internal sibling call */
+ if (add_call_dest(file, insn, dest_sym, true))
+ return -1;
continue;
}
- insn->jump_dest = jump_dest;
+set_jump_dest:
+ insn->jump_dest = dest_insn;
}
return 0;
@@ -1645,7 +1657,6 @@ static int add_call_destinations(struct objtool_file *file)
unsigned long dest_off;
struct symbol *dest;
struct reloc *reloc;
- int ret;
for_each_insn(file, insn) {
struct symbol *func = insn_func(insn);
@@ -1657,9 +1668,8 @@ static int add_call_destinations(struct objtool_file *file)
dest_off = arch_jump_destination(insn);
dest = find_call_destination(insn->sec, dest_off);
- ret = add_call_dest(file, insn, dest, false);
- if (ret)
- return ret;
+ if (add_call_dest(file, insn, dest, false))
+ return -1;
if (func && func->ignore)
continue;
@@ -1669,13 +1679,13 @@ static int add_call_destinations(struct objtool_file *file)
return -1;
}
- if (func && insn_call_dest(insn)->type != STT_FUNC) {
+ if (func && !is_func_sym(insn_call_dest(insn))) {
ERROR_INSN(insn, "unsupported call to non-function");
return -1;
}
- } else if (reloc->sym->type == STT_SECTION) {
- dest_off = arch_dest_reloc_offset(reloc_addend(reloc));
+ } else if (is_sec_sym(reloc->sym)) {
+ dest_off = arch_insn_adjusted_addend(insn, reloc);
dest = find_call_destination(reloc->sym->sec, dest_off);
if (!dest) {
ERROR_INSN(insn, "can't find call dest symbol at %s+0x%lx",
@@ -1683,19 +1693,16 @@ static int add_call_destinations(struct objtool_file *file)
return -1;
}
- ret = add_call_dest(file, insn, dest, false);
- if (ret)
- return ret;
+ if (add_call_dest(file, insn, dest, false))
+ return -1;
} else if (reloc->sym->retpoline_thunk) {
- ret = add_retpoline_call(file, insn);
- if (ret)
- return ret;
+ if (add_retpoline_call(file, insn))
+ return -1;
} else {
- ret = add_call_dest(file, insn, reloc->sym, false);
- if (ret)
- return ret;
+ if (add_call_dest(file, insn, reloc->sym, false))
+ return -1;
}
}
@@ -1744,6 +1751,7 @@ static int handle_group_alt(struct objtool_file *file,
orig_alt_group->last_insn = last_orig_insn;
orig_alt_group->nop = NULL;
orig_alt_group->ignore = orig_insn->ignore_alts;
+ orig_alt_group->feature = 0;
} else {
if (orig_alt_group->last_insn->offset + orig_alt_group->last_insn->len -
orig_alt_group->first_insn->offset != special_alt->orig_len) {
@@ -1783,6 +1791,7 @@ static int handle_group_alt(struct objtool_file *file,
nop->type = INSN_NOP;
nop->sym = orig_insn->sym;
nop->alt_group = new_alt_group;
+ nop->fake = 1;
}
if (!special_alt->new_len) {
@@ -1847,6 +1856,7 @@ end:
new_alt_group->nop = nop;
new_alt_group->ignore = (*new_insn)->ignore_alts;
new_alt_group->cfi = orig_alt_group->cfi;
+ new_alt_group->feature = special_alt->feature;
return 0;
}
@@ -1911,8 +1921,9 @@ static int add_special_section_alts(struct objtool_file *file)
struct list_head special_alts;
struct instruction *orig_insn, *new_insn;
struct special_alt *special_alt, *tmp;
+ enum alternative_type alt_type;
struct alternative *alt;
- int ret;
+ struct alternative *a;
if (special_get_alts(file->elf, &special_alts))
return -1;
@@ -1944,16 +1955,18 @@ static int add_special_section_alts(struct objtool_file *file)
continue;
}
- ret = handle_group_alt(file, special_alt, orig_insn,
- &new_insn);
- if (ret)
- return ret;
+ if (handle_group_alt(file, special_alt, orig_insn, &new_insn))
+ return -1;
+
+ alt_type = ALT_TYPE_INSTRUCTIONS;
} else if (special_alt->jump_or_nop) {
- ret = handle_jump_alt(file, special_alt, orig_insn,
- &new_insn);
- if (ret)
- return ret;
+ if (handle_jump_alt(file, special_alt, orig_insn, &new_insn))
+ return -1;
+
+ alt_type = ALT_TYPE_JUMP_TABLE;
+ } else {
+ alt_type = ALT_TYPE_EX_TABLE;
}
alt = calloc(1, sizeof(*alt));
@@ -1963,8 +1976,20 @@ static int add_special_section_alts(struct objtool_file *file)
}
alt->insn = new_insn;
- alt->next = orig_insn->alts;
- orig_insn->alts = alt;
+ alt->type = alt_type;
+ alt->next = NULL;
+
+ /*
+ * Store alternatives in the same order they have been
+ * defined.
+ */
+ if (!orig_insn->alts) {
+ orig_insn->alts = alt;
+ } else {
+ for (a = orig_insn->alts; a->next; a = a->next)
+ ;
+ a->next = alt;
+ }
list_del(&special_alt->list);
free(special_alt);
@@ -2141,15 +2166,13 @@ static int add_func_jump_tables(struct objtool_file *file,
struct symbol *func)
{
struct instruction *insn;
- int ret;
func_for_each_insn(file, func, insn) {
if (!insn_jump_table(insn))
continue;
- ret = add_jump_table(file, insn);
- if (ret)
- return ret;
+ if (add_jump_table(file, insn))
+ return -1;
}
return 0;
@@ -2163,19 +2186,17 @@ static int add_func_jump_tables(struct objtool_file *file,
static int add_jump_table_alts(struct objtool_file *file)
{
struct symbol *func;
- int ret;
if (!file->rodata)
return 0;
- for_each_sym(file, func) {
- if (func->type != STT_FUNC)
+ for_each_sym(file->elf, func) {
+ if (!is_func_sym(func) || func->alias != func)
continue;
mark_func_jump_tables(file, func);
- ret = add_func_jump_tables(file, func);
- if (ret)
- return ret;
+ if (add_func_jump_tables(file, func))
+ return -1;
}
return 0;
@@ -2209,14 +2230,14 @@ static int read_unwind_hints(struct objtool_file *file)
return -1;
}
- if (sec->sh.sh_size % sizeof(struct unwind_hint)) {
+ if (sec_size(sec) % sizeof(struct unwind_hint)) {
ERROR("struct unwind_hint size mismatch");
return -1;
}
file->hints = true;
- for (i = 0; i < sec->sh.sh_size / sizeof(struct unwind_hint); i++) {
+ for (i = 0; i < sec_size(sec) / sizeof(struct unwind_hint); i++) {
hint = (struct unwind_hint *)sec->data->d_buf + i;
reloc = find_reloc_by_dest(file->elf, sec, i * sizeof(*hint));
@@ -2225,14 +2246,7 @@ static int read_unwind_hints(struct objtool_file *file)
return -1;
}
- if (reloc->sym->type == STT_SECTION) {
- offset = reloc_addend(reloc);
- } else if (reloc->sym->local_label) {
- offset = reloc->sym->offset;
- } else {
- ERROR("unexpected relocation symbol type in %s", sec->rsec->name);
- return -1;
- }
+ offset = reloc->sym->offset + reloc_addend(reloc);
insn = find_insn(file, reloc->sym->sec, offset);
if (!insn) {
@@ -2261,7 +2275,7 @@ static int read_unwind_hints(struct objtool_file *file)
if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
- if (sym && sym->bind == STB_GLOBAL) {
+ if (sym && is_global_sym(sym)) {
if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) {
ERROR_INSN(insn, "UNWIND_HINT_IRET_REGS without ENDBR");
return -1;
@@ -2299,7 +2313,7 @@ static int read_annotate(struct objtool_file *file,
struct instruction *insn;
struct reloc *reloc;
uint64_t offset;
- int type, ret;
+ int type;
sec = find_section_by_name(file->elf, ".discard.annotate_insn");
if (!sec)
@@ -2317,10 +2331,13 @@ static int read_annotate(struct objtool_file *file,
sec->sh.sh_entsize = 8;
}
- for_each_reloc(sec->rsec, reloc) {
- type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4);
- type = bswap_if_needed(file->elf, type);
+ if (sec_num_entries(sec) != sec_num_entries(sec->rsec)) {
+ ERROR("bad .discard.annotate_insn section: missing relocs");
+ return -1;
+ }
+ for_each_reloc(sec->rsec, reloc) {
+ type = annotype(file->elf, sec, reloc);
offset = reloc->sym->offset + reloc_addend(reloc);
insn = find_insn(file, reloc->sym->sec, offset);
@@ -2329,9 +2346,8 @@ static int read_annotate(struct objtool_file *file,
return -1;
}
- ret = func(file, type, insn);
- if (ret < 0)
- return ret;
+ if (func(file, type, insn))
+ return -1;
}
return 0;
@@ -2392,6 +2408,8 @@ static int __annotate_ifc(struct objtool_file *file, int type, struct instructio
static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn)
{
+ struct symbol *sym;
+
switch (type) {
case ANNOTYPE_NOENDBR:
/* early */
@@ -2433,6 +2451,15 @@ static int __annotate_late(struct objtool_file *file, int type, struct instructi
insn->dead_end = false;
break;
+ case ANNOTYPE_NOCFI:
+ sym = insn->sym;
+ if (!sym) {
+ ERROR_INSN(insn, "dodgy NOCFI annotation");
+ return -1;
+ }
+ insn->sym->nocfi = 1;
+ break;
+
default:
ERROR_INSN(insn, "Unknown annotation type: %d", type);
return -1;
@@ -2453,28 +2480,19 @@ static bool is_profiling_func(const char *name)
if (!strncmp(name, "__sanitizer_cov_", 16))
return true;
- /*
- * Some compilers currently do not remove __tsan_func_entry/exit nor
- * __tsan_atomic_signal_fence (used for barrier instrumentation) with
- * the __no_sanitize_thread attribute, remove them. Once the kernel's
- * minimum Clang version is 14.0, this can be removed.
- */
- if (!strncmp(name, "__tsan_func_", 12) ||
- !strcmp(name, "__tsan_atomic_signal_fence"))
- return true;
-
return false;
}
static int classify_symbols(struct objtool_file *file)
{
struct symbol *func;
+ size_t len;
- for_each_sym(file, func) {
- if (func->type == STT_NOTYPE && strstarts(func->name, ".L"))
+ for_each_sym(file->elf, func) {
+ if (is_notype_sym(func) && strstarts(func->name, ".L"))
func->local_label = true;
- if (func->bind != STB_GLOBAL)
+ if (!is_global_sym(func))
continue;
if (!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
@@ -2495,6 +2513,10 @@ static int classify_symbols(struct objtool_file *file)
if (is_profiling_func(func->name))
func->profiling_func = true;
+
+ len = strlen(func->name);
+ if (len > sym_name_max_len)
+ sym_name_max_len = len;
}
return 0;
@@ -2515,7 +2537,7 @@ static void mark_rodata(struct objtool_file *file)
*
* .rodata.str1.* sections are ignored; they don't contain jump tables.
*/
- for_each_sec(file, sec) {
+ for_each_sec(file->elf, sec) {
if ((!strncmp(sec->name, ".rodata", 7) &&
!strstr(sec->name, ".str1.")) ||
!strncmp(sec->name, ".data.rel.ro", 12)) {
@@ -2527,78 +2549,115 @@ static void mark_rodata(struct objtool_file *file)
file->rodata = found;
}
+static void mark_holes(struct objtool_file *file)
+{
+ struct instruction *insn;
+ bool in_hole = false;
+
+ if (!opts.link)
+ return;
+
+ /*
+ * Whole archive runs might encounter dead code from weak symbols.
+ * This is where the linker will have dropped the weak symbol in
+ * favour of a regular symbol, but leaves the code in place.
+ */
+ for_each_insn(file, insn) {
+ if (insn->sym || !find_symbol_hole_containing(insn->sec, insn->offset)) {
+ in_hole = false;
+ continue;
+ }
+
+ /* Skip function padding and pfx code */
+ if (!in_hole && insn->type == INSN_NOP)
+ continue;
+
+ in_hole = true;
+ insn->hole = 1;
+
+ /*
+ * If this hole jumps to a .cold function, mark it ignore.
+ */
+ if (insn->jump_dest) {
+ struct symbol *dest_func = insn_func(insn->jump_dest);
+
+ if (dest_func && dest_func->cold)
+ dest_func->ignore = true;
+ }
+ }
+}
+
+static bool validate_branch_enabled(void)
+{
+ return opts.stackval ||
+ opts.orc ||
+ opts.uaccess ||
+ opts.checksum;
+}
+
static int decode_sections(struct objtool_file *file)
{
- int ret;
+ file->klp = is_livepatch_module(file);
mark_rodata(file);
- ret = init_pv_ops(file);
- if (ret)
- return ret;
+ if (init_pv_ops(file))
+ return -1;
/*
* Must be before add_{jump_call}_destination.
*/
- ret = classify_symbols(file);
- if (ret)
- return ret;
+ if (classify_symbols(file))
+ return -1;
- ret = decode_instructions(file);
- if (ret)
- return ret;
+ if (decode_instructions(file))
+ return -1;
- ret = add_ignores(file);
- if (ret)
- return ret;
+ if (add_ignores(file))
+ return -1;
add_uaccess_safe(file);
- ret = read_annotate(file, __annotate_early);
- if (ret)
- return ret;
+ if (read_annotate(file, __annotate_early))
+ return -1;
/*
* Must be before add_jump_destinations(), which depends on 'func'
* being set for alternatives, to enable proper sibling call detection.
*/
- if (opts.stackval || opts.orc || opts.uaccess || opts.noinstr) {
- ret = add_special_section_alts(file);
- if (ret)
- return ret;
+ if (validate_branch_enabled() || opts.noinstr || opts.hack_jump_label || opts.disas) {
+ if (add_special_section_alts(file))
+ return -1;
}
- ret = add_jump_destinations(file);
- if (ret)
- return ret;
+ if (add_jump_destinations(file))
+ return -1;
/*
* Must be before add_call_destination(); it changes INSN_CALL to
* INSN_JUMP.
*/
- ret = read_annotate(file, __annotate_ifc);
- if (ret)
- return ret;
+ if (read_annotate(file, __annotate_ifc))
+ return -1;
- ret = add_call_destinations(file);
- if (ret)
- return ret;
+ if (add_call_destinations(file))
+ return -1;
- ret = add_jump_table_alts(file);
- if (ret)
- return ret;
+ if (add_jump_table_alts(file))
+ return -1;
- ret = read_unwind_hints(file);
- if (ret)
- return ret;
+ if (read_unwind_hints(file))
+ return -1;
+
+ /* Must be after add_jump_destinations() */
+ mark_holes(file);
/*
* Must be after add_call_destinations() such that it can override
* dead_end_function() marks.
*/
- ret = read_annotate(file, __annotate_late);
- if (ret)
- return ret;
+ if (read_annotate(file, __annotate_late))
+ return -1;
return 0;
}
@@ -3223,18 +3282,19 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn
return 0;
}
-static int handle_insn_ops(struct instruction *insn,
- struct instruction *next_insn,
- struct insn_state *state)
+static int noinline handle_insn_ops(struct instruction *insn,
+ struct instruction *next_insn,
+ struct insn_state *state)
{
+ struct insn_state prev_state __maybe_unused = *state;
struct stack_op *op;
- int ret;
+ int ret = 0;
for (op = insn->stack_ops; op; op = op->next) {
ret = update_cfi_state(insn, next_insn, &state->cfi, op);
if (ret)
- return ret;
+ goto done;
if (!opts.uaccess || !insn->alt_group)
continue;
@@ -3244,7 +3304,8 @@ static int handle_insn_ops(struct instruction *insn,
state->uaccess_stack = 1;
} else if (state->uaccess_stack >> 31) {
WARN_INSN(insn, "PUSHF stack exhausted");
- return 1;
+ ret = 1;
+ goto done;
}
state->uaccess_stack <<= 1;
state->uaccess_stack |= state->uaccess;
@@ -3260,7 +3321,10 @@ static int handle_insn_ops(struct instruction *insn,
}
}
- return 0;
+done:
+ TRACE_INSN_STATE(insn, &prev_state, state);
+
+ return ret;
}
static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2)
@@ -3352,7 +3416,7 @@ static bool pv_call_dest(struct objtool_file *file, struct instruction *insn)
if (!reloc || strcmp(reloc->sym->name, "pv_ops"))
return false;
- idx = (arch_dest_reloc_offset(reloc_addend(reloc)) / sizeof(void *));
+ idx = arch_insn_adjusted_addend(insn, reloc) / sizeof(void *);
if (file->pv_ops[idx].clean)
return true;
@@ -3514,9 +3578,14 @@ static bool skip_alt_group(struct instruction *insn)
{
struct instruction *alt_insn = insn->alts ? insn->alts->insn : NULL;
+ if (!insn->alt_group)
+ return false;
+
/* ANNOTATE_IGNORE_ALTERNATIVE */
- if (insn->alt_group && insn->alt_group->ignore)
+ if (insn->alt_group->ignore) {
+ TRACE_ALT(insn, "alt group ignored");
return true;
+ }
/*
* For NOP patched with CLAC/STAC, only follow the latter to avoid
@@ -3538,256 +3607,398 @@ static bool skip_alt_group(struct instruction *insn)
return alt_insn->type == INSN_CLAC || alt_insn->type == INSN_STAC;
}
-/*
- * Follow the branch starting at the given instruction, and recursively follow
- * any other branches (jumps). Meanwhile, track the frame pointer state at
- * each instruction and validate all the rules described in
- * tools/objtool/Documentation/objtool.txt.
- */
-static int validate_branch(struct objtool_file *file, struct symbol *func,
- struct instruction *insn, struct insn_state state)
+static int checksum_debug_init(struct objtool_file *file)
{
- struct alternative *alt;
- struct instruction *next_insn, *prev_insn = NULL;
- struct section *sec;
- u8 visited;
- int ret;
+ char *dup, *s;
- if (func && func->ignore)
+ if (!opts.debug_checksum)
return 0;
- sec = insn->sec;
+ dup = strdup(opts.debug_checksum);
+ if (!dup) {
+ ERROR_GLIBC("strdup");
+ return -1;
+ }
- while (1) {
- next_insn = next_insn_to_validate(file, insn);
+ s = dup;
+ while (*s) {
+ struct symbol *func;
+ char *comma;
- if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
- /* Ignore KCFI type preambles, which always fall through */
- if (!strncmp(func->name, "__cfi_", 6) ||
- !strncmp(func->name, "__pfx_", 6))
- return 0;
+ comma = strchr(s, ',');
+ if (comma)
+ *comma = '\0';
- if (file->ignore_unreachables)
- return 0;
+ func = find_symbol_by_name(file->elf, s);
+ if (!func || !is_func_sym(func))
+ WARN("--debug-checksum: can't find '%s'", s);
+ else
+ func->debug_checksum = 1;
- WARN("%s() falls through to next function %s()",
- func->name, insn_func(insn)->name);
- func->warned = 1;
+ if (!comma)
+ break;
- return 1;
- }
+ s = comma + 1;
+ }
- visited = VISITED_BRANCH << state.uaccess;
- if (insn->visited & VISITED_BRANCH_MASK) {
- if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
- return 1;
+ free(dup);
+ return 0;
+}
- if (insn->visited & visited)
- return 0;
- } else {
- nr_insns_visited++;
- }
+static void checksum_update_insn(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn)
+{
+ struct reloc *reloc = insn_reloc(file, insn);
+ unsigned long offset;
+ struct symbol *sym;
- if (state.noinstr)
- state.instr += insn->instr;
+ if (insn->fake)
+ return;
- if (insn->hint) {
- if (insn->restore) {
- struct instruction *save_insn, *i;
+ checksum_update(func, insn, insn->sec->data->d_buf + insn->offset, insn->len);
- i = insn;
- save_insn = NULL;
+ if (!reloc) {
+ struct symbol *call_dest = insn_call_dest(insn);
- sym_for_each_insn_continue_reverse(file, func, i) {
- if (i->save) {
- save_insn = i;
- break;
- }
- }
+ if (call_dest)
+ checksum_update(func, insn, call_dest->demangled_name,
+ strlen(call_dest->demangled_name));
+ return;
+ }
- if (!save_insn) {
- WARN_INSN(insn, "no corresponding CFI save for CFI restore");
- return 1;
+ sym = reloc->sym;
+ offset = arch_insn_adjusted_addend(insn, reloc);
+
+ if (is_string_sec(sym->sec)) {
+ char *str;
+
+ str = sym->sec->data->d_buf + sym->offset + offset;
+ checksum_update(func, insn, str, strlen(str));
+ return;
+ }
+
+ if (is_sec_sym(sym)) {
+ sym = find_symbol_containing(reloc->sym->sec, offset);
+ if (!sym)
+ return;
+
+ offset -= sym->offset;
+ }
+
+ checksum_update(func, insn, sym->demangled_name, strlen(sym->demangled_name));
+ checksum_update(func, insn, &offset, sizeof(offset));
+}
+
+static int validate_branch(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct insn_state state);
+static int do_validate_branch(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct insn_state state);
+
+static int validate_insn(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct insn_state *statep,
+ struct instruction *prev_insn, struct instruction *next_insn,
+ bool *dead_end)
+{
+ char *alt_name __maybe_unused = NULL;
+ struct alternative *alt;
+ u8 visited;
+ int ret;
+
+ /*
+ * Any returns before the end of this function are effectively dead
+ * ends, i.e. validate_branch() has reached the end of the branch.
+ */
+ *dead_end = true;
+
+ visited = VISITED_BRANCH << statep->uaccess;
+ if (insn->visited & VISITED_BRANCH_MASK) {
+ if (!insn->hint && !insn_cfi_match(insn, &statep->cfi))
+ return 1;
+
+ if (insn->visited & visited) {
+ TRACE_INSN(insn, "already visited");
+ return 0;
+ }
+ } else {
+ nr_insns_visited++;
+ }
+
+ if (statep->noinstr)
+ statep->instr += insn->instr;
+
+ if (insn->hint) {
+ if (insn->restore) {
+ struct instruction *save_insn, *i;
+
+ i = insn;
+ save_insn = NULL;
+
+ sym_for_each_insn_continue_reverse(file, func, i) {
+ if (i->save) {
+ save_insn = i;
+ break;
}
+ }
- if (!save_insn->visited) {
- /*
- * If the restore hint insn is at the
- * beginning of a basic block and was
- * branched to from elsewhere, and the
- * save insn hasn't been visited yet,
- * defer following this branch for now.
- * It will be seen later via the
- * straight-line path.
- */
- if (!prev_insn)
- return 0;
+ if (!save_insn) {
+ WARN_INSN(insn, "no corresponding CFI save for CFI restore");
+ return 1;
+ }
- WARN_INSN(insn, "objtool isn't smart enough to handle this CFI save/restore combo");
- return 1;
+ if (!save_insn->visited) {
+ /*
+ * If the restore hint insn is at the
+ * beginning of a basic block and was
+ * branched to from elsewhere, and the
+ * save insn hasn't been visited yet,
+ * defer following this branch for now.
+ * It will be seen later via the
+ * straight-line path.
+ */
+ if (!prev_insn) {
+ TRACE_INSN(insn, "defer restore");
+ return 0;
}
- insn->cfi = save_insn->cfi;
- nr_cfi_reused++;
+ WARN_INSN(insn, "objtool isn't smart enough to handle this CFI save/restore combo");
+ return 1;
}
- state.cfi = *insn->cfi;
+ insn->cfi = save_insn->cfi;
+ nr_cfi_reused++;
+ }
+
+ statep->cfi = *insn->cfi;
+ } else {
+ /* XXX track if we actually changed statep->cfi */
+
+ if (prev_insn && !cficmp(prev_insn->cfi, &statep->cfi)) {
+ insn->cfi = prev_insn->cfi;
+ nr_cfi_reused++;
} else {
- /* XXX track if we actually changed state.cfi */
+ insn->cfi = cfi_hash_find_or_add(&statep->cfi);
+ }
+ }
- if (prev_insn && !cficmp(prev_insn->cfi, &state.cfi)) {
- insn->cfi = prev_insn->cfi;
- nr_cfi_reused++;
- } else {
- insn->cfi = cfi_hash_find_or_add(&state.cfi);
+ insn->visited |= visited;
+
+ if (propagate_alt_cfi(file, insn))
+ return 1;
+
+ if (insn->alts) {
+ for (alt = insn->alts; alt; alt = alt->next) {
+ TRACE_ALT_BEGIN(insn, alt, alt_name);
+ ret = validate_branch(file, func, alt->insn, *statep);
+ TRACE_ALT_END(insn, alt, alt_name);
+ if (ret) {
+ BT_INSN(insn, "(alt)");
+ return ret;
}
}
+ TRACE_ALT_INFO_NOADDR(insn, "/ ", "DEFAULT");
+ }
- insn->visited |= visited;
+ if (skip_alt_group(insn))
+ return 0;
- if (propagate_alt_cfi(file, insn))
+ if (handle_insn_ops(insn, next_insn, statep))
+ return 1;
+
+ switch (insn->type) {
+
+ case INSN_RETURN:
+ TRACE_INSN(insn, "return");
+ return validate_return(func, insn, statep);
+
+ case INSN_CALL:
+ case INSN_CALL_DYNAMIC:
+ if (insn->type == INSN_CALL)
+ TRACE_INSN(insn, "call");
+ else
+ TRACE_INSN(insn, "indirect call");
+
+ ret = validate_call(file, insn, statep);
+ if (ret)
+ return ret;
+
+ if (opts.stackval && func && !is_special_call(insn) &&
+ !has_valid_stack_frame(statep)) {
+ WARN_INSN(insn, "call without frame pointer save/setup");
return 1;
+ }
- if (insn->alts) {
- for (alt = insn->alts; alt; alt = alt->next) {
- ret = validate_branch(file, func, alt->insn, state);
- if (ret) {
- BT_INSN(insn, "(alt)");
- return ret;
- }
+ break;
+
+ case INSN_JUMP_CONDITIONAL:
+ case INSN_JUMP_UNCONDITIONAL:
+ if (is_sibling_call(insn)) {
+ TRACE_INSN(insn, "sibling call");
+ ret = validate_sibling_call(file, insn, statep);
+ if (ret)
+ return ret;
+
+ } else if (insn->jump_dest) {
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
+ TRACE_INSN(insn, "unconditional jump");
+ else
+ TRACE_INSN(insn, "jump taken");
+
+ ret = validate_branch(file, func, insn->jump_dest, *statep);
+ if (ret) {
+ BT_INSN(insn, "(branch)");
+ return ret;
}
}
- if (skip_alt_group(insn))
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
return 0;
- if (handle_insn_ops(insn, next_insn, &state))
- return 1;
-
- switch (insn->type) {
-
- case INSN_RETURN:
- return validate_return(func, insn, &state);
+ TRACE_INSN(insn, "jump not taken");
+ break;
- case INSN_CALL:
- case INSN_CALL_DYNAMIC:
- ret = validate_call(file, insn, &state);
+ case INSN_JUMP_DYNAMIC:
+ case INSN_JUMP_DYNAMIC_CONDITIONAL:
+ TRACE_INSN(insn, "indirect jump");
+ if (is_sibling_call(insn)) {
+ ret = validate_sibling_call(file, insn, statep);
if (ret)
return ret;
+ }
- if (opts.stackval && func && !is_special_call(insn) &&
- !has_valid_stack_frame(&state)) {
- WARN_INSN(insn, "call without frame pointer save/setup");
- return 1;
- }
+ if (insn->type == INSN_JUMP_DYNAMIC)
+ return 0;
- break;
+ break;
- case INSN_JUMP_CONDITIONAL:
- case INSN_JUMP_UNCONDITIONAL:
- if (is_sibling_call(insn)) {
- ret = validate_sibling_call(file, insn, &state);
- if (ret)
- return ret;
+ case INSN_SYSCALL:
+ TRACE_INSN(insn, "syscall");
+ if (func && (!next_insn || !next_insn->hint)) {
+ WARN_INSN(insn, "unsupported instruction in callable function");
+ return 1;
+ }
- } else if (insn->jump_dest) {
- ret = validate_branch(file, func,
- insn->jump_dest, state);
- if (ret) {
- BT_INSN(insn, "(branch)");
- return ret;
- }
- }
+ break;
- if (insn->type == INSN_JUMP_UNCONDITIONAL)
- return 0;
+ case INSN_SYSRET:
+ TRACE_INSN(insn, "sysret");
+ if (func && (!next_insn || !next_insn->hint)) {
+ WARN_INSN(insn, "unsupported instruction in callable function");
+ return 1;
+ }
+ return 0;
+
+ case INSN_STAC:
+ TRACE_INSN(insn, "stac");
+ if (!opts.uaccess)
break;
- case INSN_JUMP_DYNAMIC:
- case INSN_JUMP_DYNAMIC_CONDITIONAL:
- if (is_sibling_call(insn)) {
- ret = validate_sibling_call(file, insn, &state);
- if (ret)
- return ret;
- }
+ if (statep->uaccess) {
+ WARN_INSN(insn, "recursive UACCESS enable");
+ return 1;
+ }
- if (insn->type == INSN_JUMP_DYNAMIC)
- return 0;
+ statep->uaccess = true;
+ break;
+ case INSN_CLAC:
+ TRACE_INSN(insn, "clac");
+ if (!opts.uaccess)
break;
- case INSN_SYSCALL:
- if (func && (!next_insn || !next_insn->hint)) {
- WARN_INSN(insn, "unsupported instruction in callable function");
- return 1;
- }
+ if (!statep->uaccess && func) {
+ WARN_INSN(insn, "redundant UACCESS disable");
+ return 1;
+ }
- break;
+ if (func_uaccess_safe(func) && !statep->uaccess_stack) {
+ WARN_INSN(insn, "UACCESS-safe disables UACCESS");
+ return 1;
+ }
- case INSN_SYSRET:
- if (func && (!next_insn || !next_insn->hint)) {
- WARN_INSN(insn, "unsupported instruction in callable function");
- return 1;
- }
+ statep->uaccess = false;
+ break;
- return 0;
+ case INSN_STD:
+ TRACE_INSN(insn, "std");
+ if (statep->df) {
+ WARN_INSN(insn, "recursive STD");
+ return 1;
+ }
- case INSN_STAC:
- if (!opts.uaccess)
- break;
+ statep->df = true;
+ break;
- if (state.uaccess) {
- WARN_INSN(insn, "recursive UACCESS enable");
- return 1;
- }
+ case INSN_CLD:
+ TRACE_INSN(insn, "cld");
+ if (!statep->df && func) {
+ WARN_INSN(insn, "redundant CLD");
+ return 1;
+ }
- state.uaccess = true;
- break;
+ statep->df = false;
+ break;
- case INSN_CLAC:
- if (!opts.uaccess)
- break;
+ default:
+ break;
+ }
- if (!state.uaccess && func) {
- WARN_INSN(insn, "redundant UACCESS disable");
- return 1;
- }
+ if (insn->dead_end)
+ TRACE_INSN(insn, "dead end");
- if (func_uaccess_safe(func) && !state.uaccess_stack) {
- WARN_INSN(insn, "UACCESS-safe disables UACCESS");
- return 1;
- }
+ *dead_end = insn->dead_end;
+ return 0;
+}
- state.uaccess = false;
- break;
+/*
+ * Follow the branch starting at the given instruction, and recursively follow
+ * any other branches (jumps). Meanwhile, track the frame pointer state at
+ * each instruction and validate all the rules described in
+ * tools/objtool/Documentation/objtool.txt.
+ */
+static int do_validate_branch(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct insn_state state)
+{
+ struct instruction *next_insn, *prev_insn = NULL;
+ bool dead_end;
+ int ret;
- case INSN_STD:
- if (state.df) {
- WARN_INSN(insn, "recursive STD");
- return 1;
- }
+ if (func && func->ignore)
+ return 0;
- state.df = true;
- break;
+ do {
+ insn->trace = 0;
+ next_insn = next_insn_to_validate(file, insn);
- case INSN_CLD:
- if (!state.df && func) {
- WARN_INSN(insn, "redundant CLD");
- return 1;
- }
+ if (opts.checksum && func && insn->sec)
+ checksum_update_insn(file, func, insn);
- state.df = false;
- break;
+ if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
+ /* Ignore KCFI type preambles, which always fall through */
+ if (is_prefix_func(func))
+ return 0;
- default:
- break;
+ if (file->ignore_unreachables)
+ return 0;
+
+ WARN("%s() falls through to next function %s()",
+ func->name, insn_func(insn)->name);
+ func->warned = 1;
+
+ return 1;
}
- if (insn->dead_end)
- return 0;
+ ret = validate_insn(file, func, insn, &state, prev_insn, next_insn,
+ &dead_end);
+
+ if (!insn->trace) {
+ if (ret)
+ TRACE_INSN(insn, "warning (%d)", ret);
+ else
+ TRACE_INSN(insn, NULL);
+ }
- if (!next_insn) {
+ if (!dead_end && !next_insn) {
if (state.cfi.cfa.base == CFI_UNDEFINED)
return 0;
if (file->ignore_unreachables)
@@ -3795,15 +4006,28 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
WARN("%s%sunexpected end of section %s",
func ? func->name : "", func ? "(): " : "",
- sec->name);
+ insn->sec->name);
return 1;
}
prev_insn = insn;
insn = next_insn;
- }
- return 0;
+ } while (!dead_end);
+
+ return ret;
+}
+
+static int validate_branch(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct insn_state state)
+{
+ int ret;
+
+ trace_depth_inc();
+ ret = do_validate_branch(file, func, insn, state);
+ trace_depth_dec();
+
+ return ret;
}
static int validate_unwind_hint(struct objtool_file *file,
@@ -3811,7 +4035,13 @@ static int validate_unwind_hint(struct objtool_file *file,
struct insn_state *state)
{
if (insn->hint && !insn->visited) {
- int ret = validate_branch(file, insn_func(insn), insn, *state);
+ struct symbol *func = insn_func(insn);
+ int ret;
+
+ if (opts.checksum)
+ checksum_init(func);
+
+ ret = validate_branch(file, func, insn, *state);
if (ret)
BT_INSN(insn, "<=== (hint)");
return ret;
@@ -4002,6 +4232,37 @@ static int validate_retpoline(struct objtool_file *file)
warnings++;
}
+ if (!opts.cfi)
+ return warnings;
+
+ /*
+ * kCFI call sites look like:
+ *
+ * movl $(-0x12345678), %r10d
+ * addl -4(%r11), %r10d
+ * jz 1f
+ * ud2
+ * 1: cs call __x86_indirect_thunk_r11
+ *
+ * Verify all indirect calls are kCFI adorned by checking for the
+ * UD2. Notably, doing __nocfi calls to regular (cfi) functions is
+ * broken.
+ */
+ list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+ struct symbol *sym = insn->sym;
+
+ if (sym && (sym->type == STT_NOTYPE ||
+ sym->type == STT_FUNC) && !sym->nocfi) {
+ struct instruction *prev =
+ prev_insn_same_sym(file, insn);
+
+ if (!prev || prev->type != INSN_BUG) {
+ WARN_INSN(insn, "no-cfi indirect call!");
+ warnings++;
+ }
+ }
+ }
+
return warnings;
}
@@ -4024,7 +4285,8 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
struct instruction *prev_insn;
int i;
- if (insn->type == INSN_NOP || insn->type == INSN_TRAP || (func && func->ignore))
+ if (insn->type == INSN_NOP || insn->type == INSN_TRAP ||
+ insn->hole || (func && func->ignore))
return true;
/*
@@ -4035,47 +4297,6 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
!strcmp(insn->sec->name, ".altinstr_aux"))
return true;
- /*
- * Whole archive runs might encounter dead code from weak symbols.
- * This is where the linker will have dropped the weak symbol in
- * favour of a regular symbol, but leaves the code in place.
- *
- * In this case we'll find a piece of code (whole function) that is not
- * covered by a !section symbol. Ignore them.
- */
- if (opts.link && !func) {
- int size = find_symbol_hole_containing(insn->sec, insn->offset);
- unsigned long end = insn->offset + size;
-
- if (!size) /* not a hole */
- return false;
-
- if (size < 0) /* hole until the end */
- return true;
-
- sec_for_each_insn_continue(file, insn) {
- /*
- * If we reach a visited instruction at or before the
- * end of the hole, ignore the unreachable.
- */
- if (insn->visited)
- return true;
-
- if (insn->offset >= end)
- break;
-
- /*
- * If this hole jumps to a .cold function, mark it ignore too.
- */
- if (insn->jump_dest && insn_func(insn->jump_dest) &&
- strstr(insn_func(insn->jump_dest)->name, ".cold")) {
- insn_func(insn->jump_dest)->ignore = true;
- }
- }
-
- return false;
- }
-
if (!func)
return false;
@@ -4127,14 +4348,54 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
return false;
}
-static int add_prefix_symbol(struct objtool_file *file, struct symbol *func)
+/*
+ * For FineIBT or kCFI, a certain number of bytes preceding the function may be
+ * NOPs. Those NOPs may be rewritten at runtime and executed, so give them a
+ * proper function name: __pfx_<func>.
+ *
+ * The NOPs may not exist for the following cases:
+ *
+ * - compiler cloned functions (*.cold, *.part0, etc)
+ * - asm functions created with inline asm or without SYM_FUNC_START()
+ *
+ * Also, the function may already have a prefix from a previous objtool run
+ * (livepatch extracted functions, or manually running objtool multiple times).
+ *
+ * So return 0 if the NOPs are missing or the function already has a prefix
+ * symbol.
+ */
+static int create_prefix_symbol(struct objtool_file *file, struct symbol *func)
{
struct instruction *insn, *prev;
+ char name[SYM_NAME_LEN];
struct cfi_state *cfi;
+ if (!is_func_sym(func) || is_prefix_func(func) ||
+ func->cold || func->static_call_tramp)
+ return 0;
+
+ if ((strlen(func->name) + sizeof("__pfx_") > SYM_NAME_LEN)) {
+ WARN("%s: symbol name too long, can't create __pfx_ symbol",
+ func->name);
+ return 0;
+ }
+
+ if (snprintf_check(name, SYM_NAME_LEN, "__pfx_%s", func->name))
+ return -1;
+
+ if (file->klp) {
+ struct symbol *pfx;
+
+ pfx = find_symbol_by_offset(func->sec, func->offset - opts.prefix);
+ if (pfx && is_prefix_func(pfx) && !strcmp(pfx->name, name))
+ return 0;
+ }
+
insn = find_insn(file, func->sec, func->offset);
- if (!insn)
+ if (!insn) {
+ WARN("%s: can't find starting instruction", func->name);
return -1;
+ }
for (prev = prev_insn_same_sec(file, insn);
prev;
@@ -4142,22 +4403,27 @@ static int add_prefix_symbol(struct objtool_file *file, struct symbol *func)
u64 offset;
if (prev->type != INSN_NOP)
- return -1;
+ return 0;
offset = func->offset - prev->offset;
if (offset > opts.prefix)
- return -1;
+ return 0;
if (offset < opts.prefix)
continue;
- elf_create_prefix_symbol(file->elf, func, opts.prefix);
+ if (!elf_create_symbol(file->elf, name, func->sec,
+ GELF_ST_BIND(func->sym.st_info),
+ GELF_ST_TYPE(func->sym.st_info),
+ prev->offset, opts.prefix))
+ return -1;
+
break;
}
if (!prev)
- return -1;
+ return 0;
if (!insn->cfi) {
/*
@@ -4175,20 +4441,18 @@ static int add_prefix_symbol(struct objtool_file *file, struct symbol *func)
return 0;
}
-static int add_prefix_symbols(struct objtool_file *file)
+static int create_prefix_symbols(struct objtool_file *file)
{
struct section *sec;
struct symbol *func;
- for_each_sec(file, sec) {
- if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ for_each_sec(file->elf, sec) {
+ if (!is_text_sec(sec))
continue;
sec_for_each_sym(sec, func) {
- if (func->type != STT_FUNC)
- continue;
-
- add_prefix_symbol(file, func);
+ if (create_prefix_symbol(file, func))
+ return -1;
}
}
@@ -4199,6 +4463,7 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
struct symbol *sym, struct insn_state *state)
{
struct instruction *insn;
+ struct symbol *func;
int ret;
if (!sym->len) {
@@ -4216,9 +4481,26 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
if (opts.uaccess)
state->uaccess = sym->uaccess_safe;
- ret = validate_branch(file, insn_func(insn), insn, *state);
+ func = insn_func(insn);
+
+ if (opts.checksum)
+ checksum_init(func);
+
+ if (opts.trace && !fnmatch(opts.trace, sym->name, 0)) {
+ trace_enable();
+ TRACE("%s: validation begin\n", sym->name);
+ }
+
+ ret = validate_branch(file, func, insn, *state);
if (ret)
BT_INSN(insn, "<=== (sym)");
+
+ TRACE("%s: validation %s\n\n", sym->name, ret ? "failed" : "end");
+ trace_disable();
+
+ if (opts.checksum)
+ checksum_finish(func);
+
return ret;
}
@@ -4229,7 +4511,7 @@ static int validate_section(struct objtool_file *file, struct section *sec)
int warnings = 0;
sec_for_each_sym(sec, func) {
- if (func->type != STT_FUNC)
+ if (!is_func_sym(func))
continue;
init_insn_state(file, &state, sec);
@@ -4272,8 +4554,8 @@ static int validate_functions(struct objtool_file *file)
struct section *sec;
int warnings = 0;
- for_each_sec(file, sec) {
- if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ for_each_sec(file->elf, sec) {
+ if (!is_text_sec(sec))
continue;
warnings += validate_section(file, sec);
@@ -4400,12 +4682,7 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn
reloc_offset(reloc) + 1,
(insn->offset + insn->len) - (reloc_offset(reloc) + 1))) {
- off = reloc->sym->offset;
- if (reloc_type(reloc) == R_X86_64_PC32 ||
- reloc_type(reloc) == R_X86_64_PLT32)
- off += arch_dest_reloc_offset(reloc_addend(reloc));
- else
- off += reloc_addend(reloc);
+ off = reloc->sym->offset + arch_insn_adjusted_addend(insn, reloc);
dest = find_insn(file, reloc->sym->sec, off);
if (!dest)
@@ -4456,10 +4733,10 @@ static int validate_ibt(struct objtool_file *file)
for_each_insn(file, insn)
warnings += validate_ibt_insn(file, insn);
- for_each_sec(file, sec) {
+ for_each_sec(file->elf, sec) {
/* Already done by validate_ibt_insn() */
- if (sec->sh.sh_flags & SHF_EXECINSTR)
+ if (is_text_sec(sec))
continue;
if (!sec->rsec)
@@ -4474,8 +4751,8 @@ static int validate_ibt(struct objtool_file *file)
!strncmp(sec->name, ".debug", 6) ||
!strcmp(sec->name, ".altinstructions") ||
!strcmp(sec->name, ".ibt_endbr_seal") ||
+ !strcmp(sec->name, ".kcfi_traps") ||
!strcmp(sec->name, ".orc_unwind_ip") ||
- !strcmp(sec->name, ".parainstructions") ||
!strcmp(sec->name, ".retpoline_sites") ||
!strcmp(sec->name, ".smp_locks") ||
!strcmp(sec->name, ".static_call_sites") ||
@@ -4484,12 +4761,14 @@ static int validate_ibt(struct objtool_file *file)
!strcmp(sec->name, "__bug_table") ||
!strcmp(sec->name, "__ex_table") ||
!strcmp(sec->name, "__jump_table") ||
+ !strcmp(sec->name, "__klp_funcs") ||
!strcmp(sec->name, "__mcount_loc") ||
- !strcmp(sec->name, ".kcfi_traps") ||
!strcmp(sec->name, ".llvm.call-graph-profile") ||
!strcmp(sec->name, ".llvm_bb_addr_map") ||
!strcmp(sec->name, "__tracepoints") ||
- strstr(sec->name, "__patchable_function_entries"))
+ !strcmp(sec->name, ".return_sites") ||
+ !strcmp(sec->name, ".call_sites") ||
+ !strcmp(sec->name, "__patchable_function_entries"))
continue;
for_each_reloc(sec->rsec, reloc)
@@ -4563,85 +4842,45 @@ static int validate_reachable_instructions(struct objtool_file *file)
return warnings;
}
-/* 'funcs' is a space-separated list of function names */
-static void disas_funcs(const char *funcs)
+__weak bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc)
{
- const char *objdump_str, *cross_compile;
- int size, ret;
- char *cmd;
-
- cross_compile = getenv("CROSS_COMPILE");
- if (!cross_compile)
- cross_compile = "";
-
- objdump_str = "%sobjdump -wdr %s | gawk -M -v _funcs='%s' '"
- "BEGIN { split(_funcs, funcs); }"
- "/^$/ { func_match = 0; }"
- "/<.*>:/ { "
- "f = gensub(/.*<(.*)>:/, \"\\\\1\", 1);"
- "for (i in funcs) {"
- "if (funcs[i] == f) {"
- "func_match = 1;"
- "base = strtonum(\"0x\" $1);"
- "break;"
- "}"
- "}"
- "}"
- "{"
- "if (func_match) {"
- "addr = strtonum(\"0x\" $1);"
- "printf(\"%%04x \", addr - base);"
- "print;"
- "}"
- "}' 1>&2";
-
- /* fake snprintf() to calculate the size */
- size = snprintf(NULL, 0, objdump_str, cross_compile, objname, funcs) + 1;
- if (size <= 0) {
- WARN("objdump string size calculation failed");
- return;
- }
-
- cmd = malloc(size);
+ unsigned int type = reloc_type(reloc);
+ size_t sz = elf_addr_size(elf);
- /* real snprintf() */
- snprintf(cmd, size, objdump_str, cross_compile, objname, funcs);
- ret = system(cmd);
- if (ret) {
- WARN("disassembly failed: %d", ret);
- return;
- }
+ return (sz == 8) ? (type == R_ABS64) : (type == R_ABS32);
}
-static void disas_warned_funcs(struct objtool_file *file)
+static int check_abs_references(struct objtool_file *file)
{
- struct symbol *sym;
- char *funcs = NULL, *tmp;
-
- for_each_sym(file, sym) {
- if (sym->warned) {
- if (!funcs) {
- funcs = malloc(strlen(sym->name) + 1);
- if (!funcs) {
- ERROR_GLIBC("malloc");
- return;
- }
- strcpy(funcs, sym->name);
- } else {
- tmp = malloc(strlen(funcs) + strlen(sym->name) + 2);
- if (!tmp) {
- ERROR_GLIBC("malloc");
- return;
- }
- sprintf(tmp, "%s %s", funcs, sym->name);
- free(funcs);
- funcs = tmp;
+ struct section *sec;
+ struct reloc *reloc;
+ int ret = 0;
+
+ for_each_sec(file->elf, sec) {
+ /* absolute references in non-loadable sections are fine */
+ if (!(sec->sh.sh_flags & SHF_ALLOC))
+ continue;
+
+ /* section must have an associated .rela section */
+ if (!sec->rsec)
+ continue;
+
+ /*
+ * Special case for compiler generated metadata that is not
+ * consumed until after boot.
+ */
+ if (!strcmp(sec->name, "__patchable_function_entries"))
+ continue;
+
+ for_each_reloc(sec->rsec, reloc) {
+ if (arch_absolute_reloc(file->elf, reloc)) {
+ WARN("section %s has absolute relocation at offset 0x%llx",
+ sec->name, (unsigned long long)reloc_offset(reloc));
+ ret++;
}
}
}
-
- if (funcs)
- disas_funcs(funcs);
+ return ret;
}
struct insn_chunk {
@@ -4672,10 +4911,35 @@ static void free_insns(struct objtool_file *file)
free(chunk->addr);
}
+const char *objtool_disas_insn(struct instruction *insn)
+{
+ struct disas_context *dctx = objtool_disas_ctx;
+
+ if (!dctx)
+ return "";
+
+ disas_insn(dctx, insn);
+ return disas_result(dctx);
+}
+
int check(struct objtool_file *file)
{
+ struct disas_context *disas_ctx = NULL;
int ret = 0, warnings = 0;
+ /*
+ * Create a disassembly context if we might disassemble any
+ * instruction or function.
+ */
+ if (opts.verbose || opts.backtrace || opts.trace || opts.disas) {
+ disas_ctx = disas_context_create(file);
+ if (!disas_ctx) {
+ opts.disas = false;
+ opts.trace = false;
+ }
+ objtool_disas_ctx = disas_ctx;
+ }
+
arch_initial_func_cfi_state(&initial_func_cfi);
init_cfi_state(&init_cfi);
init_cfi_state(&func_cfi);
@@ -4691,6 +4955,10 @@ int check(struct objtool_file *file)
cfi_hash_add(&init_cfi);
cfi_hash_add(&func_cfi);
+ ret = checksum_debug_init(file);
+ if (ret)
+ goto out;
+
ret = decode_sections(file);
if (ret)
goto out;
@@ -4701,7 +4969,7 @@ int check(struct objtool_file *file)
if (opts.retpoline)
warnings += validate_retpoline(file);
- if (opts.stackval || opts.orc || opts.uaccess) {
+ if (validate_branch_enabled()) {
int w = 0;
w += validate_functions(file);
@@ -4766,7 +5034,7 @@ int check(struct objtool_file *file)
}
if (opts.prefix) {
- ret = add_prefix_symbols(file);
+ ret = create_prefix_symbols(file);
if (ret)
goto out;
}
@@ -4777,14 +5045,21 @@ int check(struct objtool_file *file)
goto out;
}
+ if (opts.noabs)
+ warnings += check_abs_references(file);
+
+ if (opts.checksum) {
+ ret = create_sym_checksum_section(file);
+ if (ret)
+ goto out;
+ }
+
if (opts.orc && nr_insns) {
ret = orc_create(file);
if (ret)
goto out;
}
- free_insns(file);
-
if (opts.stats) {
printf("nr_insns_visited: %ld\n", nr_insns_visited);
printf("nr_cfi: %ld\n", nr_cfi);
@@ -4793,18 +5068,32 @@ int check(struct objtool_file *file)
}
out:
- if (!ret && !warnings)
- return 0;
+ if (ret || warnings) {
+ if (opts.werror && warnings)
+ ret = 1;
- if (opts.werror && warnings)
- ret = 1;
+ if (opts.verbose) {
+ if (opts.werror && warnings)
+ WARN("%d warning(s) upgraded to errors", warnings);
+ disas_warned_funcs(disas_ctx);
+ }
+ }
- if (opts.verbose) {
- if (opts.werror && warnings)
- WARN("%d warning(s) upgraded to errors", warnings);
- print_args();
- disas_warned_funcs(file);
+ if (opts.disas)
+ disas_funcs(disas_ctx);
+
+ if (disas_ctx) {
+ disas_context_destroy(disas_ctx);
+ objtool_disas_ctx = NULL;
}
+ free_insns(file);
+
+ if (!ret && !warnings)
+ return 0;
+
+ if (opts.backup && make_backup())
+ return 1;
+
return ret;
}
diff --git a/tools/objtool/disas.c b/tools/objtool/disas.c
new file mode 100644
index 000000000000..2b5059f55e40
--- /dev/null
+++ b/tools/objtool/disas.c
@@ -0,0 +1,1248 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2015-2017 Josh Poimboeuf <jpoimboe@redhat.com>
+ */
+
+#define _GNU_SOURCE
+#include <fnmatch.h>
+
+#include <objtool/arch.h>
+#include <objtool/check.h>
+#include <objtool/disas.h>
+#include <objtool/special.h>
+#include <objtool/warn.h>
+
+#include <bfd.h>
+#include <linux/string.h>
+#include <tools/dis-asm-compat.h>
+
+/*
+ * Size of the buffer for storing the result of disassembling
+ * a single instruction.
+ */
+#define DISAS_RESULT_SIZE 1024
+
+struct disas_context {
+ struct objtool_file *file;
+ struct instruction *insn;
+ bool alt_applied;
+ char result[DISAS_RESULT_SIZE];
+ disassembler_ftype disassembler;
+ struct disassemble_info info;
+};
+
+/*
+ * Maximum number of alternatives
+ */
+#define DISAS_ALT_MAX 5
+
+/*
+ * Maximum number of instructions per alternative
+ */
+#define DISAS_ALT_INSN_MAX 50
+
+/*
+ * Information to disassemble an alternative
+ */
+struct disas_alt {
+ struct instruction *orig_insn; /* original instruction */
+ struct alternative *alt; /* alternative or NULL if default code */
+ char *name; /* name for this alternative */
+ int width; /* formatting width */
+ struct {
+ char *str; /* instruction string */
+ int offset; /* instruction offset */
+ int nops; /* number of nops */
+ } insn[DISAS_ALT_INSN_MAX]; /* alternative instructions */
+ int insn_idx; /* index of the next instruction to print */
+};
+
+#define DALT_DEFAULT(dalt) (!(dalt)->alt)
+#define DALT_INSN(dalt) (DALT_DEFAULT(dalt) ? (dalt)->orig_insn : (dalt)->alt->insn)
+#define DALT_GROUP(dalt) (DALT_INSN(dalt)->alt_group)
+#define DALT_ALTID(dalt) ((dalt)->orig_insn->offset)
+
+#define ALT_FLAGS_SHIFT 16
+#define ALT_FLAG_NOT (1 << 0)
+#define ALT_FLAG_DIRECT_CALL (1 << 1)
+#define ALT_FEATURE_MASK ((1 << ALT_FLAGS_SHIFT) - 1)
+
+static int alt_feature(unsigned int ft_flags)
+{
+ return (ft_flags & ALT_FEATURE_MASK);
+}
+
+static int alt_flags(unsigned int ft_flags)
+{
+ return (ft_flags >> ALT_FLAGS_SHIFT);
+}
+
+/*
+ * Wrapper around asprintf() to allocate and format a string.
+ * Return the allocated string or NULL on error.
+ */
+static char *strfmt(const char *fmt, ...)
+{
+ va_list ap;
+ char *str;
+ int rv;
+
+ va_start(ap, fmt);
+ rv = vasprintf(&str, fmt, ap);
+ va_end(ap);
+
+ return rv == -1 ? NULL : str;
+}
+
+static int sprint_name(char *str, const char *name, unsigned long offset)
+{
+ int len;
+
+ if (offset)
+ len = sprintf(str, "%s+0x%lx", name, offset);
+ else
+ len = sprintf(str, "%s", name);
+
+ return len;
+}
+
+#define DINFO_FPRINTF(dinfo, ...) \
+ ((*(dinfo)->fprintf_func)((dinfo)->stream, __VA_ARGS__))
+
+static int disas_result_fprintf(struct disas_context *dctx,
+ const char *fmt, va_list ap)
+{
+ char *buf = dctx->result;
+ int avail, len;
+
+ len = strlen(buf);
+ if (len >= DISAS_RESULT_SIZE - 1) {
+ WARN_FUNC(dctx->insn->sec, dctx->insn->offset,
+ "disassembly buffer is full");
+ return -1;
+ }
+ avail = DISAS_RESULT_SIZE - len;
+
+ len = vsnprintf(buf + len, avail, fmt, ap);
+ if (len < 0 || len >= avail) {
+ WARN_FUNC(dctx->insn->sec, dctx->insn->offset,
+ "disassembly buffer is truncated");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int disas_fprintf(void *stream, const char *fmt, ...)
+{
+ va_list arg;
+ int rv;
+
+ va_start(arg, fmt);
+ rv = disas_result_fprintf(stream, fmt, arg);
+ va_end(arg);
+
+ return rv;
+}
+
+/*
+ * For init_disassemble_info_compat().
+ */
+static int disas_fprintf_styled(void *stream,
+ enum disassembler_style style,
+ const char *fmt, ...)
+{
+ va_list arg;
+ int rv;
+
+ va_start(arg, fmt);
+ rv = disas_result_fprintf(stream, fmt, arg);
+ va_end(arg);
+
+ return rv;
+}
+
+static void disas_print_addr_sym(struct section *sec, struct symbol *sym,
+ bfd_vma addr, struct disassemble_info *dinfo)
+{
+ char symstr[1024];
+ char *str;
+
+ if (sym) {
+ sprint_name(symstr, sym->name, addr - sym->offset);
+ DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, symstr);
+ } else {
+ str = offstr(sec, addr);
+ DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, str);
+ free(str);
+ }
+}
+
+static bool disas_print_addr_alt(bfd_vma addr, struct disassemble_info *dinfo)
+{
+ struct disas_context *dctx = dinfo->application_data;
+ struct instruction *orig_first_insn;
+ struct alt_group *alt_group;
+ unsigned long offset;
+ struct symbol *sym;
+
+ /*
+ * Check if we are processing an alternative at the original
+ * instruction address (i.e. if alt_applied is true) and if
+ * we are referencing an address inside the alternative.
+ *
+ * For example, this happens if there is a branch inside an
+ * alternative. In that case, the address should be updated
+ * to a reference inside the original instruction flow.
+ */
+ if (!dctx->alt_applied)
+ return false;
+
+ alt_group = dctx->insn->alt_group;
+ if (!alt_group || !alt_group->orig_group ||
+ addr < alt_group->first_insn->offset ||
+ addr > alt_group->last_insn->offset)
+ return false;
+
+ orig_first_insn = alt_group->orig_group->first_insn;
+ offset = addr - alt_group->first_insn->offset;
+
+ addr = orig_first_insn->offset + offset;
+ sym = orig_first_insn->sym;
+
+ disas_print_addr_sym(orig_first_insn->sec, sym, addr, dinfo);
+
+ return true;
+}
+
+static void disas_print_addr_noreloc(bfd_vma addr,
+ struct disassemble_info *dinfo)
+{
+ struct disas_context *dctx = dinfo->application_data;
+ struct instruction *insn = dctx->insn;
+ struct symbol *sym = NULL;
+
+ if (disas_print_addr_alt(addr, dinfo))
+ return;
+
+ if (insn->sym && addr >= insn->sym->offset &&
+ addr < insn->sym->offset + insn->sym->len) {
+ sym = insn->sym;
+ }
+
+ disas_print_addr_sym(insn->sec, sym, addr, dinfo);
+}
+
+static void disas_print_addr_reloc(bfd_vma addr, struct disassemble_info *dinfo)
+{
+ struct disas_context *dctx = dinfo->application_data;
+ struct instruction *insn = dctx->insn;
+ unsigned long offset;
+ struct reloc *reloc;
+ char symstr[1024];
+ char *str;
+
+ reloc = find_reloc_by_dest_range(dctx->file->elf, insn->sec,
+ insn->offset, insn->len);
+ if (!reloc) {
+ /*
+ * There is no relocation for this instruction although
+ * the address to resolve points to the next instruction.
+ * So this is an effective reference to the next IP, for
+ * example: "lea 0x0(%rip),%rdi". The kernel can reference
+ * the next IP with _THIS_IP_ macro.
+ */
+ DINFO_FPRINTF(dinfo, "0x%lx <_THIS_IP_>", addr);
+ return;
+ }
+
+ offset = arch_insn_adjusted_addend(insn, reloc);
+
+ /*
+ * If the relocation symbol is a section name (for example ".bss")
+ * then we try to further resolve the name.
+ */
+ if (reloc->sym->type == STT_SECTION) {
+ str = offstr(reloc->sym->sec, reloc->sym->offset + offset);
+ DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, str);
+ free(str);
+ } else {
+ sprint_name(symstr, reloc->sym->name, offset);
+ DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, symstr);
+ }
+}
+
+/*
+ * Resolve an address into a "<symbol>+<offset>" string.
+ */
+static void disas_print_address(bfd_vma addr, struct disassemble_info *dinfo)
+{
+ struct disas_context *dctx = dinfo->application_data;
+ struct instruction *insn = dctx->insn;
+ struct instruction *jump_dest;
+ struct symbol *sym;
+ bool is_reloc;
+
+ /*
+ * If the instruction is a call/jump and it references a
+ * destination then this is likely the address we are looking
+ * up. So check it first.
+ */
+ jump_dest = insn->jump_dest;
+ if (jump_dest && jump_dest->sym && jump_dest->offset == addr) {
+ if (!disas_print_addr_alt(addr, dinfo))
+ disas_print_addr_sym(jump_dest->sec, jump_dest->sym,
+ addr, dinfo);
+ return;
+ }
+
+ /*
+ * If the address points to the next instruction then there is
+ * probably a relocation. It can be a false positive when the
+ * current instruction is referencing the address of the next
+ * instruction. This particular case will be handled in
+ * disas_print_addr_reloc().
+ */
+ is_reloc = (addr == insn->offset + insn->len);
+
+ /*
+ * The call destination offset can be the address we are looking
+ * up, or 0 if there is a relocation.
+ */
+ sym = insn_call_dest(insn);
+ if (sym && (sym->offset == addr || (sym->offset == 0 && is_reloc))) {
+ DINFO_FPRINTF(dinfo, "0x%lx <%s>", addr, sym->name);
+ return;
+ }
+
+ if (!is_reloc)
+ disas_print_addr_noreloc(addr, dinfo);
+ else
+ disas_print_addr_reloc(addr, dinfo);
+}
+
+/*
+ * Initialize disassemble info arch, mach (32 or 64-bit) and options.
+ */
+int disas_info_init(struct disassemble_info *dinfo,
+ int arch, int mach32, int mach64,
+ const char *options)
+{
+ struct disas_context *dctx = dinfo->application_data;
+ struct objtool_file *file = dctx->file;
+
+ dinfo->arch = arch;
+
+ switch (file->elf->ehdr.e_ident[EI_CLASS]) {
+ case ELFCLASS32:
+ dinfo->mach = mach32;
+ break;
+ case ELFCLASS64:
+ dinfo->mach = mach64;
+ break;
+ default:
+ return -1;
+ }
+
+ dinfo->disassembler_options = options;
+
+ return 0;
+}
+
+struct disas_context *disas_context_create(struct objtool_file *file)
+{
+ struct disas_context *dctx;
+ struct disassemble_info *dinfo;
+ int err;
+
+ dctx = malloc(sizeof(*dctx));
+ if (!dctx) {
+ WARN("failed to allocate disassembly context");
+ return NULL;
+ }
+
+ dctx->file = file;
+ dinfo = &dctx->info;
+
+ init_disassemble_info_compat(dinfo, dctx,
+ disas_fprintf, disas_fprintf_styled);
+
+ dinfo->read_memory_func = buffer_read_memory;
+ dinfo->print_address_func = disas_print_address;
+ dinfo->application_data = dctx;
+
+ /*
+ * bfd_openr() is not used to avoid doing ELF data processing
+ * and caching that has already being done. Here, we just need
+ * to identify the target file so we call an arch specific
+ * function to fill some disassemble info (arch, mach).
+ */
+
+ dinfo->arch = bfd_arch_unknown;
+ dinfo->mach = 0;
+
+ err = arch_disas_info_init(dinfo);
+ if (err || dinfo->arch == bfd_arch_unknown || dinfo->mach == 0) {
+ WARN("failed to init disassembly arch");
+ goto error;
+ }
+
+ dinfo->endian = (file->elf->ehdr.e_ident[EI_DATA] == ELFDATA2MSB) ?
+ BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE;
+
+ disassemble_init_for_target(dinfo);
+
+ dctx->disassembler = disassembler(dinfo->arch,
+ dinfo->endian == BFD_ENDIAN_BIG,
+ dinfo->mach, NULL);
+ if (!dctx->disassembler) {
+ WARN("failed to create disassembler function");
+ goto error;
+ }
+
+ return dctx;
+
+error:
+ free(dctx);
+ return NULL;
+}
+
+void disas_context_destroy(struct disas_context *dctx)
+{
+ free(dctx);
+}
+
+char *disas_result(struct disas_context *dctx)
+{
+ return dctx->result;
+}
+
+#define DISAS_INSN_OFFSET_SPACE 10
+#define DISAS_INSN_SPACE 60
+
+#define DISAS_PRINSN(dctx, insn, depth) \
+ disas_print_insn(stdout, dctx, insn, depth, "\n")
+
+/*
+ * Print a message in the instruction flow. If sec is not NULL then the
+ * address at the section offset is printed in addition of the message,
+ * otherwise only the message is printed.
+ */
+static int disas_vprint(FILE *stream, struct section *sec, unsigned long offset,
+ int depth, const char *format, va_list ap)
+{
+ const char *addr_str;
+ int i, n;
+ int len;
+
+ len = sym_name_max_len + DISAS_INSN_OFFSET_SPACE;
+ if (depth < 0) {
+ len += depth;
+ depth = 0;
+ }
+
+ n = 0;
+
+ if (sec) {
+ addr_str = offstr(sec, offset);
+ n += fprintf(stream, "%6lx: %-*s ", offset, len, addr_str);
+ free((char *)addr_str);
+ } else {
+ len += DISAS_INSN_OFFSET_SPACE + 1;
+ n += fprintf(stream, "%-*s", len, "");
+ }
+
+ /* print vertical bars to show the code flow */
+ for (i = 0; i < depth; i++)
+ n += fprintf(stream, "| ");
+
+ if (format)
+ n += vfprintf(stream, format, ap);
+
+ return n;
+}
+
+static int disas_print(FILE *stream, struct section *sec, unsigned long offset,
+ int depth, const char *format, ...)
+{
+ va_list args;
+ int len;
+
+ va_start(args, format);
+ len = disas_vprint(stream, sec, offset, depth, format, args);
+ va_end(args);
+
+ return len;
+}
+
+/*
+ * Print a message in the instruction flow. If insn is not NULL then
+ * the instruction address is printed in addition of the message,
+ * otherwise only the message is printed. In all cases, the instruction
+ * itself is not printed.
+ */
+void disas_print_info(FILE *stream, struct instruction *insn, int depth,
+ const char *format, ...)
+{
+ struct section *sec;
+ unsigned long off;
+ va_list args;
+
+ if (insn) {
+ sec = insn->sec;
+ off = insn->offset;
+ } else {
+ sec = NULL;
+ off = 0;
+ }
+
+ va_start(args, format);
+ disas_vprint(stream, sec, off, depth, format, args);
+ va_end(args);
+}
+
+/*
+ * Print an instruction address (offset and function), the instruction itself
+ * and an optional message.
+ */
+void disas_print_insn(FILE *stream, struct disas_context *dctx,
+ struct instruction *insn, int depth,
+ const char *format, ...)
+{
+ char fake_nop_insn[32];
+ const char *insn_str;
+ bool fake_nop;
+ va_list args;
+ int len;
+
+ /*
+ * Alternative can insert a fake nop, sometimes with no
+ * associated section so nothing to disassemble.
+ */
+ fake_nop = (!insn->sec && insn->type == INSN_NOP);
+ if (fake_nop) {
+ snprintf(fake_nop_insn, 32, "<fake nop> (%d bytes)", insn->len);
+ insn_str = fake_nop_insn;
+ } else {
+ disas_insn(dctx, insn);
+ insn_str = disas_result(dctx);
+ }
+
+ /* print the instruction */
+ len = (depth + 1) * 2 < DISAS_INSN_SPACE ? DISAS_INSN_SPACE - (depth+1) * 2 : 1;
+ disas_print_info(stream, insn, depth, "%-*s", len, insn_str);
+
+ /* print message if any */
+ if (!format)
+ return;
+
+ if (strcmp(format, "\n") == 0) {
+ fprintf(stream, "\n");
+ return;
+ }
+
+ fprintf(stream, " - ");
+ va_start(args, format);
+ vfprintf(stream, format, args);
+ va_end(args);
+}
+
+/*
+ * Disassemble a single instruction. Return the size of the instruction.
+ *
+ * If alt_applied is true then insn should be an instruction from of an
+ * alternative (i.e. insn->alt_group != NULL), and it is disassembled
+ * at the location of the original code it is replacing. When the
+ * instruction references any address inside the alternative then
+ * these references will be re-adjusted to replace the original code.
+ */
+static size_t disas_insn_common(struct disas_context *dctx,
+ struct instruction *insn,
+ bool alt_applied)
+{
+ disassembler_ftype disasm = dctx->disassembler;
+ struct disassemble_info *dinfo = &dctx->info;
+
+ dctx->insn = insn;
+ dctx->alt_applied = alt_applied;
+ dctx->result[0] = '\0';
+
+ if (insn->type == INSN_NOP) {
+ DINFO_FPRINTF(dinfo, "nop%d", insn->len);
+ return insn->len;
+ }
+
+ /*
+ * Set the disassembler buffer to read data from the section
+ * containing the instruction to disassemble.
+ */
+ dinfo->buffer = insn->sec->data->d_buf;
+ dinfo->buffer_vma = 0;
+ dinfo->buffer_length = insn->sec->sh.sh_size;
+
+ return disasm(insn->offset, &dctx->info);
+}
+
+size_t disas_insn(struct disas_context *dctx, struct instruction *insn)
+{
+ return disas_insn_common(dctx, insn, false);
+}
+
+static size_t disas_insn_alt(struct disas_context *dctx,
+ struct instruction *insn)
+{
+ return disas_insn_common(dctx, insn, true);
+}
+
+static struct instruction *next_insn_same_alt(struct objtool_file *file,
+ struct alt_group *alt_grp,
+ struct instruction *insn)
+{
+ if (alt_grp->last_insn == insn || alt_grp->nop == insn)
+ return NULL;
+
+ return next_insn_same_sec(file, insn);
+}
+
+#define alt_for_each_insn(file, alt_grp, insn) \
+ for (insn = alt_grp->first_insn; \
+ insn; \
+ insn = next_insn_same_alt(file, alt_grp, insn))
+
+/*
+ * Provide a name for the type of alternatives present at the
+ * specified instruction.
+ *
+ * An instruction can have alternatives with different types, for
+ * example alternative instructions and an exception table. In that
+ * case the name for the alternative instructions type is used.
+ *
+ * Return NULL if the instruction as no alternative.
+ */
+const char *disas_alt_type_name(struct instruction *insn)
+{
+ struct alternative *alt;
+ const char *name;
+
+ name = NULL;
+ for (alt = insn->alts; alt; alt = alt->next) {
+ if (alt->type == ALT_TYPE_INSTRUCTIONS) {
+ name = "alternative";
+ break;
+ }
+
+ switch (alt->type) {
+ case ALT_TYPE_EX_TABLE:
+ name = "ex_table";
+ break;
+ case ALT_TYPE_JUMP_TABLE:
+ name = "jump_table";
+ break;
+ default:
+ name = "unknown";
+ break;
+ }
+ }
+
+ return name;
+}
+
+/*
+ * Provide a name for an alternative.
+ */
+char *disas_alt_name(struct alternative *alt)
+{
+ char pfx[4] = { 0 };
+ char *str = NULL;
+ const char *name;
+ int feature;
+ int flags;
+ int num;
+
+ switch (alt->type) {
+
+ case ALT_TYPE_EX_TABLE:
+ str = strdup("EXCEPTION");
+ break;
+
+ case ALT_TYPE_JUMP_TABLE:
+ str = strdup("JUMP");
+ break;
+
+ case ALT_TYPE_INSTRUCTIONS:
+ /*
+ * This is a non-default group alternative. Create a name
+ * based on the feature and flags associated with this
+ * alternative. Use either the feature name (it is available)
+ * or the feature number. And add a prefix to show the flags
+ * used.
+ *
+ * Prefix flags characters:
+ *
+ * '!' alternative used when feature not enabled
+ * '+' direct call alternative
+ * '?' unknown flag
+ */
+
+ if (!alt->insn->alt_group)
+ return NULL;
+
+ feature = alt->insn->alt_group->feature;
+ num = alt_feature(feature);
+ flags = alt_flags(feature);
+ str = pfx;
+
+ if (flags & ~(ALT_FLAG_NOT | ALT_FLAG_DIRECT_CALL))
+ *str++ = '?';
+ if (flags & ALT_FLAG_DIRECT_CALL)
+ *str++ = '+';
+ if (flags & ALT_FLAG_NOT)
+ *str++ = '!';
+
+ name = arch_cpu_feature_name(num);
+ if (!name)
+ str = strfmt("%sFEATURE 0x%X", pfx, num);
+ else
+ str = strfmt("%s%s", pfx, name);
+
+ break;
+ }
+
+ return str;
+}
+
+/*
+ * Initialize an alternative. The default alternative should be initialized
+ * with alt=NULL.
+ */
+static int disas_alt_init(struct disas_alt *dalt,
+ struct instruction *orig_insn,
+ struct alternative *alt)
+{
+ dalt->orig_insn = orig_insn;
+ dalt->alt = alt;
+ dalt->insn_idx = 0;
+ dalt->name = alt ? disas_alt_name(alt) : strdup("DEFAULT");
+ if (!dalt->name)
+ return -1;
+ dalt->width = strlen(dalt->name);
+
+ return 0;
+}
+
+static int disas_alt_add_insn(struct disas_alt *dalt, int index, char *insn_str,
+ int offset, int nops)
+{
+ int len;
+
+ if (index >= DISAS_ALT_INSN_MAX) {
+ WARN("Alternative %lx.%s has more instructions than supported",
+ DALT_ALTID(dalt), dalt->name);
+ return -1;
+ }
+
+ len = strlen(insn_str);
+ dalt->insn[index].str = insn_str;
+ dalt->insn[index].offset = offset;
+ dalt->insn[index].nops = nops;
+ if (len > dalt->width)
+ dalt->width = len;
+
+ return 0;
+}
+
+static int disas_alt_jump(struct disas_alt *dalt)
+{
+ struct instruction *orig_insn;
+ struct instruction *dest_insn;
+ char suffix[2] = { 0 };
+ char *str;
+ int nops;
+
+ orig_insn = dalt->orig_insn;
+ dest_insn = dalt->alt->insn;
+
+ if (orig_insn->type == INSN_NOP) {
+ if (orig_insn->len == 5)
+ suffix[0] = 'q';
+ str = strfmt("jmp%-3s %lx <%s+0x%lx>", suffix,
+ dest_insn->offset, dest_insn->sym->name,
+ dest_insn->offset - dest_insn->sym->offset);
+ nops = 0;
+ } else {
+ str = strfmt("nop%d", orig_insn->len);
+ nops = orig_insn->len;
+ }
+
+ if (!str)
+ return -1;
+
+ disas_alt_add_insn(dalt, 0, str, 0, nops);
+
+ return 1;
+}
+
+/*
+ * Disassemble an exception table alternative.
+ */
+static int disas_alt_extable(struct disas_alt *dalt)
+{
+ struct instruction *alt_insn;
+ char *str;
+
+ alt_insn = dalt->alt->insn;
+ str = strfmt("resume at 0x%lx <%s+0x%lx>",
+ alt_insn->offset, alt_insn->sym->name,
+ alt_insn->offset - alt_insn->sym->offset);
+ if (!str)
+ return -1;
+
+ disas_alt_add_insn(dalt, 0, str, 0, 0);
+
+ return 1;
+}
+
+/*
+ * Disassemble an alternative and store instructions in the disas_alt
+ * structure. Return the number of instructions in the alternative.
+ */
+static int disas_alt_group(struct disas_context *dctx, struct disas_alt *dalt)
+{
+ struct objtool_file *file;
+ struct instruction *insn;
+ int offset;
+ char *str;
+ int count;
+ int nops;
+ int err;
+
+ file = dctx->file;
+ count = 0;
+ offset = 0;
+ nops = 0;
+
+ alt_for_each_insn(file, DALT_GROUP(dalt), insn) {
+
+ disas_insn_alt(dctx, insn);
+ str = strdup(disas_result(dctx));
+ if (!str)
+ return -1;
+
+ nops = insn->type == INSN_NOP ? insn->len : 0;
+ err = disas_alt_add_insn(dalt, count, str, offset, nops);
+ if (err)
+ break;
+ offset += insn->len;
+ count++;
+ }
+
+ return count;
+}
+
+/*
+ * Disassemble the default alternative.
+ */
+static int disas_alt_default(struct disas_context *dctx, struct disas_alt *dalt)
+{
+ char *str;
+ int nops;
+ int err;
+
+ if (DALT_GROUP(dalt))
+ return disas_alt_group(dctx, dalt);
+
+ /*
+ * Default alternative with no alt_group: this is the default
+ * code associated with either a jump table or an exception
+ * table and no other instruction alternatives. In that case
+ * the default alternative is made of a single instruction.
+ */
+ disas_insn(dctx, dalt->orig_insn);
+ str = strdup(disas_result(dctx));
+ if (!str)
+ return -1;
+ nops = dalt->orig_insn->type == INSN_NOP ? dalt->orig_insn->len : 0;
+ err = disas_alt_add_insn(dalt, 0, str, 0, nops);
+ if (err)
+ return -1;
+
+ return 1;
+}
+
+/*
+ * For each alternative, if there is an instruction at the specified
+ * offset then print this instruction, otherwise print a blank entry.
+ * The offset is an offset from the start of the alternative.
+ *
+ * Return the offset for the next instructions to print, or -1 if all
+ * instructions have been printed.
+ */
+static int disas_alt_print_insn(struct disas_alt *dalts, int alt_count,
+ int insn_count, int offset)
+{
+ struct disas_alt *dalt;
+ int offset_next;
+ char *str;
+ int i, j;
+
+ offset_next = -1;
+
+ for (i = 0; i < alt_count; i++) {
+ dalt = &dalts[i];
+ j = dalt->insn_idx;
+ if (j == -1) {
+ printf("| %-*s ", dalt->width, "");
+ continue;
+ }
+
+ if (dalt->insn[j].offset == offset) {
+ str = dalt->insn[j].str;
+ printf("| %-*s ", dalt->width, str ?: "");
+ if (++j < insn_count) {
+ dalt->insn_idx = j;
+ } else {
+ dalt->insn_idx = -1;
+ continue;
+ }
+ } else {
+ printf("| %-*s ", dalt->width, "");
+ }
+
+ if (dalt->insn[j].offset > 0 &&
+ (offset_next == -1 ||
+ (dalt->insn[j].offset < offset_next)))
+ offset_next = dalt->insn[j].offset;
+ }
+ printf("\n");
+
+ return offset_next;
+}
+
+/*
+ * Print all alternatives side-by-side.
+ */
+static void disas_alt_print_wide(char *alt_name, struct disas_alt *dalts, int alt_count,
+ int insn_count)
+{
+ struct instruction *orig_insn;
+ int offset_next;
+ int offset;
+ int i;
+
+ orig_insn = dalts[0].orig_insn;
+
+ /*
+ * Print an header with the name of each alternative.
+ */
+ disas_print_info(stdout, orig_insn, -2, NULL);
+
+ if (strlen(alt_name) > dalts[0].width)
+ dalts[0].width = strlen(alt_name);
+ printf("| %-*s ", dalts[0].width, alt_name);
+
+ for (i = 1; i < alt_count; i++)
+ printf("| %-*s ", dalts[i].width, dalts[i].name);
+
+ printf("\n");
+
+ /*
+ * Print instructions for each alternative.
+ */
+ offset_next = 0;
+ do {
+ offset = offset_next;
+ disas_print(stdout, orig_insn->sec, orig_insn->offset + offset,
+ -2, NULL);
+ offset_next = disas_alt_print_insn(dalts, alt_count, insn_count,
+ offset);
+ } while (offset_next > offset);
+}
+
+/*
+ * Print all alternatives one above the other.
+ */
+static void disas_alt_print_compact(char *alt_name, struct disas_alt *dalts,
+ int alt_count, int insn_count)
+{
+ struct instruction *orig_insn;
+ int width;
+ int i, j;
+ int len;
+
+ orig_insn = dalts[0].orig_insn;
+
+ len = disas_print(stdout, orig_insn->sec, orig_insn->offset, 0, NULL);
+ printf("%s\n", alt_name);
+
+ /*
+ * If all alternatives have a single instruction then print each
+ * alternative on a single line. Otherwise, print alternatives
+ * one above the other with a clear separation.
+ */
+
+ if (insn_count == 1) {
+ width = 0;
+ for (i = 0; i < alt_count; i++) {
+ if (dalts[i].width > width)
+ width = dalts[i].width;
+ }
+
+ for (i = 0; i < alt_count; i++) {
+ printf("%*s= %-*s (if %s)\n", len, "", width,
+ dalts[i].insn[0].str, dalts[i].name);
+ }
+
+ return;
+ }
+
+ for (i = 0; i < alt_count; i++) {
+ printf("%*s= %s\n", len, "", dalts[i].name);
+ for (j = 0; j < insn_count; j++) {
+ if (!dalts[i].insn[j].str)
+ break;
+ disas_print(stdout, orig_insn->sec,
+ orig_insn->offset + dalts[i].insn[j].offset, 0,
+ "| %s\n", dalts[i].insn[j].str);
+ }
+ printf("%*s|\n", len, "");
+ }
+}
+
+/*
+ * Trim NOPs in alternatives. This replaces trailing NOPs in alternatives
+ * with a single indication of the number of bytes covered with NOPs.
+ *
+ * Return the maximum numbers of instructions in all alternatives after
+ * trailing NOPs have been trimmed.
+ */
+static int disas_alt_trim_nops(struct disas_alt *dalts, int alt_count,
+ int insn_count)
+{
+ struct disas_alt *dalt;
+ int nops_count;
+ const char *s;
+ int offset;
+ int count;
+ int nops;
+ int i, j;
+
+ count = 0;
+ for (i = 0; i < alt_count; i++) {
+ offset = 0;
+ nops = 0;
+ nops_count = 0;
+ dalt = &dalts[i];
+ for (j = insn_count - 1; j >= 0; j--) {
+ if (!dalt->insn[j].str || !dalt->insn[j].nops)
+ break;
+ offset = dalt->insn[j].offset;
+ free(dalt->insn[j].str);
+ dalt->insn[j].offset = 0;
+ dalt->insn[j].str = NULL;
+ nops += dalt->insn[j].nops;
+ nops_count++;
+ }
+
+ /*
+ * All trailing NOPs have been removed. If there was a single
+ * NOP instruction then re-add it. If there was a block of
+ * NOPs then indicate the number of bytes than the block
+ * covers (nop*<number-of-bytes>).
+ */
+ if (nops_count) {
+ s = nops_count == 1 ? "" : "*";
+ dalt->insn[j + 1].str = strfmt("nop%s%d", s, nops);
+ dalt->insn[j + 1].offset = offset;
+ dalt->insn[j + 1].nops = nops;
+ j++;
+ }
+
+ if (j > count)
+ count = j;
+ }
+
+ return count + 1;
+}
+
+/*
+ * Disassemble an alternative.
+ *
+ * Return the last instruction in the default alternative so that
+ * disassembly can continue with the next instruction. Return NULL
+ * on error.
+ */
+static void *disas_alt(struct disas_context *dctx,
+ struct instruction *orig_insn)
+{
+ struct disas_alt dalts[DISAS_ALT_MAX] = { 0 };
+ struct instruction *last_insn = NULL;
+ struct alternative *alt;
+ struct disas_alt *dalt;
+ int insn_count = 0;
+ int alt_count = 0;
+ char *alt_name;
+ int count;
+ int i, j;
+ int err;
+
+ alt_name = strfmt("<%s.%lx>", disas_alt_type_name(orig_insn),
+ orig_insn->offset);
+ if (!alt_name) {
+ WARN("Failed to define name for alternative at instruction 0x%lx",
+ orig_insn->offset);
+ goto done;
+ }
+
+ /*
+ * Initialize and disassemble the default alternative.
+ */
+ err = disas_alt_init(&dalts[0], orig_insn, NULL);
+ if (err) {
+ WARN("%s: failed to initialize default alternative", alt_name);
+ goto done;
+ }
+
+ insn_count = disas_alt_default(dctx, &dalts[0]);
+ if (insn_count < 0) {
+ WARN("%s: failed to disassemble default alternative", alt_name);
+ goto done;
+ }
+
+ /*
+ * Initialize and disassemble all other alternatives.
+ */
+ i = 1;
+ for (alt = orig_insn->alts; alt; alt = alt->next) {
+ if (i >= DISAS_ALT_MAX) {
+ WARN("%s has more alternatives than supported", alt_name);
+ break;
+ }
+
+ dalt = &dalts[i];
+ err = disas_alt_init(dalt, orig_insn, alt);
+ if (err) {
+ WARN("%s: failed to disassemble alternative", alt_name);
+ goto done;
+ }
+
+ count = -1;
+ switch (dalt->alt->type) {
+ case ALT_TYPE_INSTRUCTIONS:
+ count = disas_alt_group(dctx, dalt);
+ break;
+ case ALT_TYPE_EX_TABLE:
+ count = disas_alt_extable(dalt);
+ break;
+ case ALT_TYPE_JUMP_TABLE:
+ count = disas_alt_jump(dalt);
+ break;
+ }
+ if (count < 0) {
+ WARN("%s: failed to disassemble alternative %s",
+ alt_name, dalt->name);
+ goto done;
+ }
+
+ insn_count = count > insn_count ? count : insn_count;
+ i++;
+ }
+ alt_count = i;
+
+ /*
+ * Print default and non-default alternatives.
+ */
+
+ insn_count = disas_alt_trim_nops(dalts, alt_count, insn_count);
+
+ if (opts.wide)
+ disas_alt_print_wide(alt_name, dalts, alt_count, insn_count);
+ else
+ disas_alt_print_compact(alt_name, dalts, alt_count, insn_count);
+
+ last_insn = orig_insn->alt_group ? orig_insn->alt_group->last_insn :
+ orig_insn;
+
+done:
+ for (i = 0; i < alt_count; i++) {
+ free(dalts[i].name);
+ for (j = 0; j < insn_count; j++)
+ free(dalts[i].insn[j].str);
+ }
+
+ free(alt_name);
+
+ return last_insn;
+}
+
+/*
+ * Disassemble a function.
+ */
+static void disas_func(struct disas_context *dctx, struct symbol *func)
+{
+ struct instruction *insn_start;
+ struct instruction *insn;
+
+ printf("%s:\n", func->name);
+ sym_for_each_insn(dctx->file, func, insn) {
+ if (insn->alts) {
+ insn_start = insn;
+ insn = disas_alt(dctx, insn);
+ if (insn)
+ continue;
+ /*
+ * There was an error with disassembling
+ * the alternative. Resume disassembling
+ * at the current instruction, this will
+ * disassemble the default alternative
+ * only and continue with the code after
+ * the alternative.
+ */
+ insn = insn_start;
+ }
+
+ DISAS_PRINSN(dctx, insn, 0);
+ }
+ printf("\n");
+}
+
+/*
+ * Disassemble all warned functions.
+ */
+void disas_warned_funcs(struct disas_context *dctx)
+{
+ struct symbol *sym;
+
+ if (!dctx)
+ return;
+
+ for_each_sym(dctx->file->elf, sym) {
+ if (sym->warned)
+ disas_func(dctx, sym);
+ }
+}
+
+void disas_funcs(struct disas_context *dctx)
+{
+ bool disas_all = !strcmp(opts.disas, "*");
+ struct section *sec;
+ struct symbol *sym;
+
+ for_each_sec(dctx->file->elf, sec) {
+
+ if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ continue;
+
+ sec_for_each_sym(sec, sym) {
+ /*
+ * If the function had a warning and the verbose
+ * option is used then the function was already
+ * disassemble.
+ */
+ if (opts.verbose && sym->warned)
+ continue;
+
+ if (disas_all || fnmatch(opts.disas, sym->name, 0) == 0)
+ disas_func(dctx, sym);
+ }
+ }
+}
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index ca5d77db692a..6a8ed9c62323 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -16,12 +16,17 @@
#include <string.h>
#include <unistd.h>
#include <errno.h>
+#include <libgen.h>
+#include <ctype.h>
#include <linux/interval_tree_generic.h>
#include <objtool/builtin.h>
-
#include <objtool/elf.h>
#include <objtool/warn.h>
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_UP_POW2(x) (1U << ((8 * sizeof(x)) - __builtin_clz((x) - 1U)))
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+
static inline u32 str_hash(const char *str)
{
return jhash(str, strlen(str), 0);
@@ -92,11 +97,12 @@ static inline unsigned long __sym_start(struct symbol *s)
static inline unsigned long __sym_last(struct symbol *s)
{
- return s->offset + s->len - 1;
+ return s->offset + (s->len ? s->len - 1 : 0);
}
INTERVAL_TREE_DEFINE(struct symbol, node, unsigned long, __subtree_last,
- __sym_start, __sym_last, static, __sym)
+ __sym_start, __sym_last, static inline __maybe_unused,
+ __sym)
#define __sym_for_each(_iter, _tree, _start, _end) \
for (_iter = __sym_iter_first((_tree), (_start), (_end)); \
@@ -108,7 +114,7 @@ struct symbol_hole {
};
/*
- * Find !section symbol where @offset is after it.
+ * Find the last symbol before @offset.
*/
static int symbol_hole_by_offset(const void *key, const struct rb_node *node)
{
@@ -119,8 +125,7 @@ static int symbol_hole_by_offset(const void *key, const struct rb_node *node)
return -1;
if (sh->key >= s->offset + s->len) {
- if (s->type != STT_SECTION)
- sh->sym = s;
+ sh->sym = s;
return 1;
}
@@ -167,11 +172,11 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
{
struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
- struct symbol *iter;
+ struct symbol *sym;
- __sym_for_each(iter, tree, offset, offset) {
- if (iter->offset == offset && iter->type != STT_SECTION)
- return iter;
+ __sym_for_each(sym, tree, offset, offset) {
+ if (sym->offset == offset && !is_sec_sym(sym))
+ return sym->alias;
}
return NULL;
@@ -180,11 +185,11 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
{
struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
- struct symbol *iter;
+ struct symbol *func;
- __sym_for_each(iter, tree, offset, offset) {
- if (iter->offset == offset && iter->type == STT_FUNC)
- return iter;
+ __sym_for_each(func, tree, offset, offset) {
+ if (func->offset == offset && is_func_sym(func))
+ return func->alias;
}
return NULL;
@@ -193,14 +198,29 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset)
{
struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
- struct symbol *iter;
+ struct symbol *sym = NULL, *tmp;
- __sym_for_each(iter, tree, offset, offset) {
- if (iter->type != STT_SECTION)
- return iter;
+ __sym_for_each(tmp, tree, offset, offset) {
+ if (tmp->len) {
+ if (!sym) {
+ sym = tmp;
+ continue;
+ }
+
+ if (sym->offset != tmp->offset || sym->len != tmp->len) {
+ /*
+ * In the rare case of overlapping symbols,
+ * pick the smaller one.
+ *
+ * TODO: outlaw overlapping symbols
+ */
+ if (tmp->len < sym->len)
+ sym = tmp;
+ }
+ }
}
- return NULL;
+ return sym ? sym->alias : NULL;
}
/*
@@ -246,11 +266,11 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset)
struct symbol *find_func_containing(struct section *sec, unsigned long offset)
{
struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
- struct symbol *iter;
+ struct symbol *func;
- __sym_for_each(iter, tree, offset, offset) {
- if (iter->type == STT_FUNC)
- return iter;
+ __sym_for_each(func, tree, offset, offset) {
+ if (is_func_sym(func))
+ return func->alias;
}
return NULL;
@@ -268,6 +288,35 @@ struct symbol *find_symbol_by_name(const struct elf *elf, const char *name)
return NULL;
}
+/* Find local symbol with matching STT_FILE */
+static struct symbol *find_local_symbol_by_file_and_name(const struct elf *elf,
+ struct symbol *file,
+ const char *name)
+{
+ struct symbol *sym;
+
+ elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) {
+ if (sym->bind == STB_LOCAL && sym->file == file &&
+ !strcmp(sym->name, name)) {
+ return sym;
+ }
+ }
+
+ return NULL;
+}
+
+struct symbol *find_global_symbol_by_name(const struct elf *elf, const char *name)
+{
+ struct symbol *sym;
+
+ elf_hash_for_each_possible(symbol_name, sym, name_hash, str_hash(name)) {
+ if (!strcmp(sym->name, name) && !is_local_sym(sym))
+ return sym;
+ }
+
+ return NULL;
+}
+
struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
unsigned long offset, unsigned int len)
{
@@ -358,14 +407,14 @@ static int read_sections(struct elf *elf)
return -1;
}
- if (sec->sh.sh_size != 0 && !is_dwarf_section(sec)) {
+ if (sec_size(sec) != 0 && !is_dwarf_section(sec)) {
sec->data = elf_getdata(s, NULL);
if (!sec->data) {
ERROR_ELF("elf_getdata");
return -1;
}
if (sec->data->d_off != 0 ||
- sec->data->d_size != sec->sh.sh_size) {
+ sec->data->d_size != sec_size(sec)) {
ERROR("unexpected data attributes for %s", sec->name);
return -1;
}
@@ -393,7 +442,38 @@ static int read_sections(struct elf *elf)
return 0;
}
-static void elf_add_symbol(struct elf *elf, struct symbol *sym)
+static const char *demangle_name(struct symbol *sym)
+{
+ char *str;
+
+ if (!is_local_sym(sym))
+ return sym->name;
+
+ if (!is_func_sym(sym) && !is_object_sym(sym))
+ return sym->name;
+
+ if (!strstarts(sym->name, "__UNIQUE_ID_") && !strchr(sym->name, '.'))
+ return sym->name;
+
+ str = strdup(sym->name);
+ if (!str) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
+ for (int i = strlen(str) - 1; i >= 0; i--) {
+ char c = str[i];
+
+ if (!isdigit(c) && c != '.') {
+ str[i + 1] = '\0';
+ break;
+ }
+ }
+
+ return str;
+}
+
+static int elf_add_symbol(struct elf *elf, struct symbol *sym)
{
struct list_head *entry;
struct rb_node *pnode;
@@ -405,14 +485,15 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
sym->type = GELF_ST_TYPE(sym->sym.st_info);
sym->bind = GELF_ST_BIND(sym->sym.st_info);
- if (sym->type == STT_FILE)
+ if (is_file_sym(sym))
elf->num_files++;
sym->offset = sym->sym.st_value;
sym->len = sym->sym.st_size;
__sym_for_each(iter, &sym->sec->symbol_tree, sym->offset, sym->offset) {
- if (iter->offset == sym->offset && iter->type == sym->type)
+ if (!is_undef_sym(iter) && iter->offset == sym->offset &&
+ iter->type == sym->type && iter->len == sym->len)
iter->alias = sym;
}
@@ -423,21 +504,44 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
else
entry = &sym->sec->symbol_list;
list_add(&sym->list, entry);
+
+ list_add_tail(&sym->global_list, &elf->symbols);
elf_hash_add(symbol, &sym->hash, sym->idx);
elf_hash_add(symbol_name, &sym->name_hash, str_hash(sym->name));
- /*
- * Don't store empty STT_NOTYPE symbols in the rbtree. They
- * can exist within a function, confusing the sorting.
- */
- if (!sym->len)
- __sym_remove(sym, &sym->sec->symbol_tree);
+ if (is_func_sym(sym) &&
+ (strstarts(sym->name, "__pfx_") ||
+ strstarts(sym->name, "__cfi_") ||
+ strstarts(sym->name, "__pi___pfx_") ||
+ strstarts(sym->name, "__pi___cfi_")))
+ sym->prefix = 1;
+
+ if (strstarts(sym->name, ".klp.sym"))
+ sym->klp = 1;
+
+ if (!sym->klp && !is_sec_sym(sym) && strstr(sym->name, ".cold")) {
+ sym->cold = 1;
+
+ /*
+ * Clang doesn't mark cold subfunctions as STT_FUNC, which
+ * breaks several objtool assumptions. Fake it.
+ */
+ sym->type = STT_FUNC;
+ }
+
+ sym->pfunc = sym->cfunc = sym;
+
+ sym->demangled_name = demangle_name(sym);
+ if (!sym->demangled_name)
+ return -1;
+
+ return 0;
}
static int read_symbols(struct elf *elf)
{
struct section *symtab, *symtab_shndx, *sec;
- struct symbol *sym, *pfunc;
+ struct symbol *sym, *pfunc, *file = NULL;
int symbols_nr, i;
char *coldstr;
Elf_Data *shndx_data = NULL;
@@ -469,6 +573,9 @@ static int read_symbols(struct elf *elf)
ERROR_GLIBC("calloc");
return -1;
}
+
+ INIT_LIST_HEAD(&elf->symbols);
+
for (i = 0; i < symbols_nr; i++) {
sym = &elf->symbol_data[i];
@@ -477,14 +584,14 @@ static int read_symbols(struct elf *elf)
if (!gelf_getsymshndx(symtab->data, shndx_data, i, &sym->sym,
&shndx)) {
ERROR_ELF("gelf_getsymshndx");
- goto err;
+ return -1;
}
sym->name = elf_strptr(elf->elf, symtab->sh.sh_link,
sym->sym.st_name);
if (!sym->name) {
ERROR_ELF("elf_strptr");
- goto err;
+ return -1;
}
if ((sym->sym.st_shndx > SHN_UNDEF &&
@@ -496,7 +603,7 @@ static int read_symbols(struct elf *elf)
sym->sec = find_section_by_index(elf, shndx);
if (!sym->sec) {
ERROR("couldn't find section for symbol %s", sym->name);
- goto err;
+ return -1;
}
if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) {
sym->name = sym->sec->name;
@@ -505,7 +612,13 @@ static int read_symbols(struct elf *elf)
} else
sym->sec = find_section_by_index(elf, 0);
- elf_add_symbol(elf, sym);
+ if (elf_add_symbol(elf, sym))
+ return -1;
+
+ if (sym->type == STT_FILE)
+ file = sym;
+ else if (sym->bind == STB_LOCAL)
+ sym->file = file;
}
if (opts.stats) {
@@ -518,18 +631,15 @@ static int read_symbols(struct elf *elf)
sec_for_each_sym(sec, sym) {
char *pname;
size_t pnamelen;
- if (sym->type != STT_FUNC)
- continue;
-
- if (sym->pfunc == NULL)
- sym->pfunc = sym;
- if (sym->cfunc == NULL)
- sym->cfunc = sym;
+ if (!sym->cold)
+ continue;
coldstr = strstr(sym->name, ".cold");
- if (!coldstr)
- continue;
+ if (!coldstr) {
+ ERROR("%s(): cold subfunction without \".cold\"?", sym->name);
+ return -1;
+ }
pnamelen = coldstr - sym->name;
pname = strndup(sym->name, pnamelen);
@@ -538,7 +648,9 @@ static int read_symbols(struct elf *elf)
return -1;
}
- pfunc = find_symbol_by_name(elf, pname);
+ pfunc = find_local_symbol_by_file_and_name(elf, sym->file, pname);
+ if (!pfunc)
+ pfunc = find_global_symbol_by_name(elf, pname);
free(pname);
if (!pfunc) {
@@ -546,8 +658,9 @@ static int read_symbols(struct elf *elf)
return -1;
}
- sym->pfunc = pfunc;
+ sym->pfunc = pfunc->alias;
pfunc->cfunc = sym;
+ pfunc->alias->cfunc = sym;
/*
* Unfortunately, -fnoreorder-functions puts the child
@@ -566,10 +679,6 @@ static int read_symbols(struct elf *elf)
}
return 0;
-
-err:
- free(sym);
- return -1;
}
static int mark_group_syms(struct elf *elf)
@@ -583,7 +692,7 @@ static int mark_group_syms(struct elf *elf)
return -1;
}
- list_for_each_entry(sec, &elf->sections, list) {
+ for_each_sec(elf, sec) {
if (sec->sh.sh_type == SHT_GROUP &&
sec->sh.sh_link == symtab->idx) {
sym = find_symbol_by_index(elf, sec->sh.sh_info);
@@ -624,7 +733,7 @@ static int elf_update_sym_relocs(struct elf *elf, struct symbol *sym)
static int elf_update_symbol(struct elf *elf, struct section *symtab,
struct section *symtab_shndx, struct symbol *sym)
{
- Elf32_Word shndx = sym->sec ? sym->sec->idx : SHN_UNDEF;
+ Elf32_Word shndx;
Elf_Data *symtab_data = NULL, *shndx_data = NULL;
Elf64_Xword entsize = symtab->sh.sh_entsize;
int max_idx, idx = sym->idx;
@@ -632,8 +741,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
bool is_special_shndx = sym->sym.st_shndx >= SHN_LORESERVE &&
sym->sym.st_shndx != SHN_XINDEX;
- if (is_special_shndx)
- shndx = sym->sym.st_shndx;
+ shndx = is_special_shndx ? sym->sym.st_shndx : sym->sec->idx;
s = elf_getscn(elf->elf, symtab->idx);
if (!s) {
@@ -731,7 +839,7 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
}
/* setup extended section index magic and write the symbol */
- if ((shndx >= SHN_UNDEF && shndx < SHN_LORESERVE) || is_special_shndx) {
+ if (shndx < SHN_LORESERVE || is_special_shndx) {
sym->sym.st_shndx = shndx;
if (!shndx_data)
shndx = 0;
@@ -751,24 +859,58 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
return 0;
}
-static struct symbol *
-__elf_create_symbol(struct elf *elf, struct symbol *sym)
+struct symbol *elf_create_symbol(struct elf *elf, const char *name,
+ struct section *sec, unsigned int bind,
+ unsigned int type, unsigned long offset,
+ size_t size)
{
struct section *symtab, *symtab_shndx;
Elf32_Word first_non_local, new_idx;
- struct symbol *old;
+ struct symbol *old, *sym;
- symtab = find_section_by_name(elf, ".symtab");
- if (symtab) {
- symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+ sym = calloc(1, sizeof(*sym));
+ if (!sym) {
+ ERROR_GLIBC("calloc");
+ return NULL;
+ }
+
+ sym->name = strdup(name);
+ if (!sym->name) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
+ if (type != STT_SECTION) {
+ sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
+ if (sym->sym.st_name == -1)
+ return NULL;
+ }
+
+ if (sec) {
+ sym->sec = sec;
} else {
+ sym->sec = find_section_by_index(elf, 0);
+ if (!sym->sec) {
+ ERROR("no NULL section");
+ return NULL;
+ }
+ }
+
+ sym->sym.st_info = GELF_ST_INFO(bind, type);
+ sym->sym.st_value = offset;
+ sym->sym.st_size = size;
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (!symtab) {
ERROR("no .symtab");
return NULL;
}
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+
new_idx = sec_num_entries(symtab);
- if (GELF_ST_BIND(sym->sym.st_info) != STB_LOCAL)
+ if (bind != STB_LOCAL)
goto non_local;
/*
@@ -806,10 +948,8 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym)
non_local:
sym->idx = new_idx;
- if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) {
- ERROR("elf_update_symbol");
+ if (sym->idx && elf_update_symbol(elf, symtab, symtab_shndx, sym))
return NULL;
- }
symtab->sh.sh_size += symtab->sh.sh_entsize;
mark_sec_changed(elf, symtab, true);
@@ -819,70 +959,28 @@ non_local:
mark_sec_changed(elf, symtab_shndx, true);
}
- return sym;
-}
-
-static struct symbol *
-elf_create_section_symbol(struct elf *elf, struct section *sec)
-{
- struct symbol *sym = calloc(1, sizeof(*sym));
-
- if (!sym) {
- ERROR_GLIBC("malloc");
+ if (elf_add_symbol(elf, sym))
return NULL;
- }
-
- sym->name = sec->name;
- sym->sec = sec;
-
- // st_name 0
- sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
- // st_other 0
- // st_value 0
- // st_size 0
-
- sym = __elf_create_symbol(elf, sym);
- if (sym)
- elf_add_symbol(elf, sym);
return sym;
}
-static int elf_add_string(struct elf *elf, struct section *strtab, char *str);
-
-struct symbol *
-elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size)
+struct symbol *elf_create_section_symbol(struct elf *elf, struct section *sec)
{
struct symbol *sym = calloc(1, sizeof(*sym));
- size_t namelen = strlen(orig->name) + sizeof("__pfx_");
- char *name = malloc(namelen);
- if (!sym || !name) {
- ERROR_GLIBC("malloc");
+ sym = elf_create_symbol(elf, sec->name, sec, STB_LOCAL, STT_SECTION, 0, 0);
+ if (!sym)
return NULL;
- }
- snprintf(name, namelen, "__pfx_%s", orig->name);
-
- sym->name = name;
- sym->sec = orig->sec;
-
- sym->sym.st_name = elf_add_string(elf, NULL, name);
- sym->sym.st_info = orig->sym.st_info;
- sym->sym.st_value = orig->sym.st_value - size;
- sym->sym.st_size = size;
-
- sym = __elf_create_symbol(elf, sym);
- if (sym)
- elf_add_symbol(elf, sym);
+ sec->sym = sym;
return sym;
}
-static struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec,
- unsigned int reloc_idx,
- unsigned long offset, struct symbol *sym,
- s64 addend, unsigned int type)
+struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec,
+ unsigned int reloc_idx, unsigned long offset,
+ struct symbol *sym, s64 addend, unsigned int type)
{
struct reloc *reloc, empty = { 0 };
@@ -922,9 +1020,9 @@ struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec,
unsigned long insn_off)
{
struct symbol *sym = insn_sec->sym;
- int addend = insn_off;
+ s64 addend = insn_off;
- if (!(insn_sec->sh.sh_flags & SHF_EXECINSTR)) {
+ if (!is_text_sec(insn_sec)) {
ERROR("bad call to %s() for data symbol %s", __func__, sym->name);
return NULL;
}
@@ -939,8 +1037,6 @@ struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec,
sym = elf_create_section_symbol(elf, insn_sec);
if (!sym)
return NULL;
-
- insn_sec->sym = sym;
}
return elf_init_reloc(elf, sec->rsec, reloc_idx, offset, sym, addend,
@@ -953,7 +1049,7 @@ struct reloc *elf_init_reloc_data_sym(struct elf *elf, struct section *sec,
struct symbol *sym,
s64 addend)
{
- if (sym->sec && (sec->sh.sh_flags & SHF_EXECINSTR)) {
+ if (is_text_sec(sec)) {
ERROR("bad call to %s() for text symbol %s", __func__, sym->name);
return NULL;
}
@@ -986,12 +1082,16 @@ static int read_relocs(struct elf *elf)
rsec->base->rsec = rsec;
- nr_reloc = 0;
+ /* nr_alloc_relocs=0: libelf owns d_buf */
+ rsec->nr_alloc_relocs = 0;
+
rsec->relocs = calloc(sec_num_entries(rsec), sizeof(*reloc));
if (!rsec->relocs) {
ERROR_GLIBC("calloc");
return -1;
}
+
+ nr_reloc = 0;
for (i = 0; i < sec_num_entries(rsec); i++) {
reloc = &rsec->relocs[i];
@@ -1044,6 +1144,12 @@ struct elf *elf_open_read(const char *name, int flags)
goto err;
}
+ elf->name = strdup(name);
+ if (!elf->name) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
if ((flags & O_ACCMODE) == O_RDONLY)
cmd = ELF_C_READ_MMAP;
else if ((flags & O_ACCMODE) == O_RDWR)
@@ -1081,11 +1187,142 @@ err:
return NULL;
}
-static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
+struct elf *elf_create_file(GElf_Ehdr *ehdr, const char *name)
{
- Elf_Data *data;
- Elf_Scn *s;
- int len;
+ struct section *null, *symtab, *strtab, *shstrtab;
+ char *dir, *base, *tmp_name;
+ struct symbol *sym;
+ struct elf *elf;
+
+ elf_version(EV_CURRENT);
+
+ elf = calloc(1, sizeof(*elf));
+ if (!elf) {
+ ERROR_GLIBC("calloc");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&elf->sections);
+
+ dir = strdup(name);
+ if (!dir) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
+ dir = dirname(dir);
+
+ base = strdup(name);
+ if (!base) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
+ base = basename(base);
+
+ tmp_name = malloc(256);
+ if (!tmp_name) {
+ ERROR_GLIBC("malloc");
+ return NULL;
+ }
+
+ snprintf(tmp_name, 256, "%s/%s.XXXXXX", dir, base);
+
+ elf->fd = mkstemp(tmp_name);
+ if (elf->fd == -1) {
+ ERROR_GLIBC("can't create tmp file");
+ exit(1);
+ }
+
+ elf->tmp_name = tmp_name;
+
+ elf->name = strdup(name);
+ if (!elf->name) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
+ elf->elf = elf_begin(elf->fd, ELF_C_WRITE, NULL);
+ if (!elf->elf) {
+ ERROR_ELF("elf_begin");
+ return NULL;
+ }
+
+ if (!gelf_newehdr(elf->elf, ELFCLASS64)) {
+ ERROR_ELF("gelf_newehdr");
+ return NULL;
+ }
+
+ memcpy(&elf->ehdr, ehdr, sizeof(elf->ehdr));
+
+ if (!gelf_update_ehdr(elf->elf, &elf->ehdr)) {
+ ERROR_ELF("gelf_update_ehdr");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&elf->symbols);
+
+ if (!elf_alloc_hash(section, 1000) ||
+ !elf_alloc_hash(section_name, 1000) ||
+ !elf_alloc_hash(symbol, 10000) ||
+ !elf_alloc_hash(symbol_name, 10000) ||
+ !elf_alloc_hash(reloc, 100000))
+ return NULL;
+
+ null = elf_create_section(elf, NULL, 0, 0, SHT_NULL, 0, 0);
+ shstrtab = elf_create_section(elf, NULL, 0, 0, SHT_STRTAB, 1, 0);
+ strtab = elf_create_section(elf, NULL, 0, 0, SHT_STRTAB, 1, 0);
+
+ if (!null || !shstrtab || !strtab)
+ return NULL;
+
+ null->name = "";
+ shstrtab->name = ".shstrtab";
+ strtab->name = ".strtab";
+
+ null->sh.sh_name = elf_add_string(elf, shstrtab, null->name);
+ shstrtab->sh.sh_name = elf_add_string(elf, shstrtab, shstrtab->name);
+ strtab->sh.sh_name = elf_add_string(elf, shstrtab, strtab->name);
+
+ if (null->sh.sh_name == -1 || shstrtab->sh.sh_name == -1 || strtab->sh.sh_name == -1)
+ return NULL;
+
+ elf_hash_add(section_name, &null->name_hash, str_hash(null->name));
+ elf_hash_add(section_name, &strtab->name_hash, str_hash(strtab->name));
+ elf_hash_add(section_name, &shstrtab->name_hash, str_hash(shstrtab->name));
+
+ if (elf_add_string(elf, strtab, "") == -1)
+ return NULL;
+
+ symtab = elf_create_section(elf, ".symtab", 0x18, 0x18, SHT_SYMTAB, 0x8, 0);
+ if (!symtab)
+ return NULL;
+
+ symtab->sh.sh_link = strtab->idx;
+ symtab->sh.sh_info = 1;
+
+ elf->ehdr.e_shstrndx = shstrtab->idx;
+ if (!gelf_update_ehdr(elf->elf, &elf->ehdr)) {
+ ERROR_ELF("gelf_update_ehdr");
+ return NULL;
+ }
+
+ sym = calloc(1, sizeof(*sym));
+ if (!sym) {
+ ERROR_GLIBC("calloc");
+ return NULL;
+ }
+
+ sym->name = "";
+ sym->sec = null;
+ elf_add_symbol(elf, sym);
+
+ return elf;
+}
+
+unsigned int elf_add_string(struct elf *elf, struct section *strtab, const char *str)
+{
+ unsigned int offset;
if (!strtab)
strtab = find_section_by_name(elf, ".strtab");
@@ -1094,76 +1331,109 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
return -1;
}
- s = elf_getscn(elf->elf, strtab->idx);
+ if (!strtab->sh.sh_addralign) {
+ ERROR("'%s': invalid sh_addralign", strtab->name);
+ return -1;
+ }
+
+ offset = ALIGN_UP(strtab->sh.sh_size, strtab->sh.sh_addralign);
+
+ if (!elf_add_data(elf, strtab, str, strlen(str) + 1))
+ return -1;
+
+ return offset;
+}
+
+void *elf_add_data(struct elf *elf, struct section *sec, const void *data, size_t size)
+{
+ unsigned long offset;
+ Elf_Scn *s;
+
+ if (!sec->sh.sh_addralign) {
+ ERROR("'%s': invalid sh_addralign", sec->name);
+ return NULL;
+ }
+
+ s = elf_getscn(elf->elf, sec->idx);
if (!s) {
ERROR_ELF("elf_getscn");
- return -1;
+ return NULL;
}
- data = elf_newdata(s);
- if (!data) {
+ sec->data = elf_newdata(s);
+ if (!sec->data) {
ERROR_ELF("elf_newdata");
- return -1;
+ return NULL;
}
- data->d_buf = str;
- data->d_size = strlen(str) + 1;
- data->d_align = 1;
+ sec->data->d_buf = calloc(1, size);
+ if (!sec->data->d_buf) {
+ ERROR_GLIBC("calloc");
+ return NULL;
+ }
- len = strtab->sh.sh_size;
- strtab->sh.sh_size += data->d_size;
+ if (data)
+ memcpy(sec->data->d_buf, data, size);
- mark_sec_changed(elf, strtab, true);
+ sec->data->d_size = size;
+ sec->data->d_align = 1;
- return len;
+ offset = ALIGN_UP(sec->sh.sh_size, sec->sh.sh_addralign);
+ sec->sh.sh_size = offset + size;
+
+ mark_sec_changed(elf, sec, true);
+
+ return sec->data->d_buf;
}
struct section *elf_create_section(struct elf *elf, const char *name,
- size_t entsize, unsigned int nr)
+ size_t size, size_t entsize,
+ unsigned int type, unsigned int align,
+ unsigned int flags)
{
struct section *sec, *shstrtab;
- size_t size = entsize * nr;
Elf_Scn *s;
- sec = malloc(sizeof(*sec));
+ if (name && find_section_by_name(elf, name)) {
+ ERROR("section '%s' already exists", name);
+ return NULL;
+ }
+
+ sec = calloc(1, sizeof(*sec));
if (!sec) {
- ERROR_GLIBC("malloc");
+ ERROR_GLIBC("calloc");
return NULL;
}
- memset(sec, 0, sizeof(*sec));
INIT_LIST_HEAD(&sec->symbol_list);
+ /* don't actually create the section, just the data structures */
+ if (type == SHT_NULL)
+ goto add;
+
s = elf_newscn(elf->elf);
if (!s) {
ERROR_ELF("elf_newscn");
return NULL;
}
- sec->name = strdup(name);
- if (!sec->name) {
- ERROR_GLIBC("strdup");
- return NULL;
- }
-
sec->idx = elf_ndxscn(s);
- sec->data = elf_newdata(s);
- if (!sec->data) {
- ERROR_ELF("elf_newdata");
- return NULL;
- }
+ if (size) {
+ sec->data = elf_newdata(s);
+ if (!sec->data) {
+ ERROR_ELF("elf_newdata");
+ return NULL;
+ }
- sec->data->d_size = size;
- sec->data->d_align = 1;
+ sec->data->d_size = size;
+ sec->data->d_align = 1;
- if (size) {
- sec->data->d_buf = malloc(size);
+ sec->data->d_buf = calloc(1, size);
if (!sec->data->d_buf) {
- ERROR_GLIBC("malloc");
+ ERROR_GLIBC("calloc");
return NULL;
}
- memset(sec->data->d_buf, 0, size);
}
if (!gelf_getshdr(s, &sec->sh)) {
@@ -1173,34 +1443,152 @@ struct section *elf_create_section(struct elf *elf, const char *name,
sec->sh.sh_size = size;
sec->sh.sh_entsize = entsize;
- sec->sh.sh_type = SHT_PROGBITS;
- sec->sh.sh_addralign = 1;
- sec->sh.sh_flags = SHF_ALLOC;
-
- /* Add section name to .shstrtab (or .strtab for Clang) */
- shstrtab = find_section_by_name(elf, ".shstrtab");
- if (!shstrtab)
- shstrtab = find_section_by_name(elf, ".strtab");
- if (!shstrtab) {
- ERROR("can't find .shstrtab or .strtab section");
- return NULL;
+ sec->sh.sh_type = type;
+ sec->sh.sh_addralign = align;
+ sec->sh.sh_flags = flags;
+
+ if (name) {
+ sec->name = strdup(name);
+ if (!sec->name) {
+ ERROR("strdup");
+ return NULL;
+ }
+
+ /* Add section name to .shstrtab (or .strtab for Clang) */
+ shstrtab = find_section_by_name(elf, ".shstrtab");
+ if (!shstrtab) {
+ shstrtab = find_section_by_name(elf, ".strtab");
+ if (!shstrtab) {
+ ERROR("can't find .shstrtab or .strtab");
+ return NULL;
+ }
+ }
+ sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
+ if (sec->sh.sh_name == -1)
+ return NULL;
+
+ elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
}
- sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
- if (sec->sh.sh_name == -1)
- return NULL;
+add:
list_add_tail(&sec->list, &elf->sections);
elf_hash_add(section, &sec->hash, sec->idx);
- elf_hash_add(section_name, &sec->name_hash, str_hash(sec->name));
mark_sec_changed(elf, sec, true);
return sec;
}
-static struct section *elf_create_rela_section(struct elf *elf,
- struct section *sec,
- unsigned int reloc_nr)
+static int elf_alloc_reloc(struct elf *elf, struct section *rsec)
+{
+ struct reloc *old_relocs, *old_relocs_end, *new_relocs;
+ unsigned int nr_relocs_old = sec_num_entries(rsec);
+ unsigned int nr_relocs_new = nr_relocs_old + 1;
+ unsigned long nr_alloc;
+ struct symbol *sym;
+
+ if (!rsec->data) {
+ rsec->data = elf_newdata(elf_getscn(elf->elf, rsec->idx));
+ if (!rsec->data) {
+ ERROR_ELF("elf_newdata");
+ return -1;
+ }
+
+ rsec->data->d_align = 1;
+ rsec->data->d_type = ELF_T_RELA;
+ rsec->data->d_buf = NULL;
+ }
+
+ rsec->data->d_size = nr_relocs_new * elf_rela_size(elf);
+ rsec->sh.sh_size = rsec->data->d_size;
+
+ nr_alloc = MAX(64, ALIGN_UP_POW2(nr_relocs_new));
+ if (nr_alloc <= rsec->nr_alloc_relocs)
+ return 0;
+
+ if (rsec->data->d_buf && !rsec->nr_alloc_relocs) {
+ void *orig_buf = rsec->data->d_buf;
+
+ /*
+ * The original d_buf is owned by libelf so it can't be
+ * realloced.
+ */
+ rsec->data->d_buf = malloc(nr_alloc * elf_rela_size(elf));
+ if (!rsec->data->d_buf) {
+ ERROR_GLIBC("malloc");
+ return -1;
+ }
+ memcpy(rsec->data->d_buf, orig_buf,
+ nr_relocs_old * elf_rela_size(elf));
+ } else {
+ rsec->data->d_buf = realloc(rsec->data->d_buf,
+ nr_alloc * elf_rela_size(elf));
+ if (!rsec->data->d_buf) {
+ ERROR_GLIBC("realloc");
+ return -1;
+ }
+ }
+
+ rsec->nr_alloc_relocs = nr_alloc;
+
+ old_relocs = rsec->relocs;
+ new_relocs = calloc(nr_alloc, sizeof(struct reloc));
+ if (!new_relocs) {
+ ERROR_GLIBC("calloc");
+ return -1;
+ }
+
+ if (!old_relocs)
+ goto done;
+
+ /*
+ * The struct reloc's address has changed. Update all the symbols and
+ * relocs which reference it.
+ */
+
+ old_relocs_end = &old_relocs[nr_relocs_old];
+ for_each_sym(elf, sym) {
+ struct reloc *reloc;
+
+ reloc = sym->relocs;
+ if (!reloc)
+ continue;
+
+ if (reloc >= old_relocs && reloc < old_relocs_end)
+ sym->relocs = &new_relocs[reloc - old_relocs];
+
+ while (1) {
+ struct reloc *next_reloc = sym_next_reloc(reloc);
+
+ if (!next_reloc)
+ break;
+
+ if (next_reloc >= old_relocs && next_reloc < old_relocs_end)
+ set_sym_next_reloc(reloc, &new_relocs[next_reloc - old_relocs]);
+
+ reloc = next_reloc;
+ }
+ }
+
+ memcpy(new_relocs, old_relocs, nr_relocs_old * sizeof(struct reloc));
+
+ for (int i = 0; i < nr_relocs_old; i++) {
+ struct reloc *old = &old_relocs[i];
+ struct reloc *new = &new_relocs[i];
+ u32 key = reloc_hash(old);
+
+ elf_hash_del(reloc, &old->hash, key);
+ elf_hash_add(reloc, &new->hash, key);
+ }
+
+ free(old_relocs);
+done:
+ rsec->relocs = new_relocs;
+ return 0;
+}
+
+struct section *elf_create_rela_section(struct elf *elf, struct section *sec,
+ unsigned int nr_relocs)
{
struct section *rsec;
char *rsec_name;
@@ -1213,41 +1601,72 @@ static struct section *elf_create_rela_section(struct elf *elf,
strcpy(rsec_name, ".rela");
strcat(rsec_name, sec->name);
- rsec = elf_create_section(elf, rsec_name, elf_rela_size(elf), reloc_nr);
+ rsec = elf_create_section(elf, rsec_name, nr_relocs * elf_rela_size(elf),
+ elf_rela_size(elf), SHT_RELA, elf_addr_size(elf),
+ SHF_INFO_LINK);
free(rsec_name);
if (!rsec)
return NULL;
- rsec->data->d_type = ELF_T_RELA;
- rsec->sh.sh_type = SHT_RELA;
- rsec->sh.sh_addralign = elf_addr_size(elf);
- rsec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
- rsec->sh.sh_info = sec->idx;
- rsec->sh.sh_flags = SHF_INFO_LINK;
+ if (nr_relocs) {
+ rsec->data->d_type = ELF_T_RELA;
- rsec->relocs = calloc(sec_num_entries(rsec), sizeof(struct reloc));
- if (!rsec->relocs) {
- ERROR_GLIBC("calloc");
- return NULL;
+ rsec->nr_alloc_relocs = nr_relocs;
+ rsec->relocs = calloc(nr_relocs, sizeof(struct reloc));
+ if (!rsec->relocs) {
+ ERROR_GLIBC("calloc");
+ return NULL;
+ }
}
+ rsec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
+ rsec->sh.sh_info = sec->idx;
+
sec->rsec = rsec;
rsec->base = sec;
return rsec;
}
+struct reloc *elf_create_reloc(struct elf *elf, struct section *sec,
+ unsigned long offset,
+ struct symbol *sym, s64 addend,
+ unsigned int type)
+{
+ struct section *rsec = sec->rsec;
+
+ if (!rsec) {
+ rsec = elf_create_rela_section(elf, sec, 0);
+ if (!rsec)
+ return NULL;
+ }
+
+ if (find_reloc_by_dest(elf, sec, offset)) {
+ ERROR_FUNC(sec, offset, "duplicate reloc");
+ return NULL;
+ }
+
+ if (elf_alloc_reloc(elf, rsec))
+ return NULL;
+
+ mark_sec_changed(elf, rsec, true);
+
+ return elf_init_reloc(elf, rsec, sec_num_entries(rsec) - 1, offset, sym,
+ addend, type);
+}
+
struct section *elf_create_section_pair(struct elf *elf, const char *name,
size_t entsize, unsigned int nr,
- unsigned int reloc_nr)
+ unsigned int nr_relocs)
{
struct section *sec;
- sec = elf_create_section(elf, name, entsize, nr);
+ sec = elf_create_section(elf, name, nr * entsize, entsize,
+ SHT_PROGBITS, 1, SHF_ALLOC);
if (!sec)
return NULL;
- if (!elf_create_rela_section(elf, sec, reloc_nr))
+ if (!elf_create_rela_section(elf, sec, nr_relocs))
return NULL;
return sec;
@@ -1282,7 +1701,7 @@ int elf_write_insn(struct elf *elf, struct section *sec,
*/
static int elf_truncate_section(struct elf *elf, struct section *sec)
{
- u64 size = sec->sh.sh_size;
+ u64 size = sec_size(sec);
bool truncated = false;
Elf_Data *data = NULL;
Elf_Scn *s;
@@ -1296,7 +1715,6 @@ static int elf_truncate_section(struct elf *elf, struct section *sec)
for (;;) {
/* get next data descriptor for the relevant section */
data = elf_getdata(s, data);
-
if (!data) {
if (size) {
ERROR("end of section data but non-zero size left\n");
@@ -1332,8 +1750,8 @@ int elf_write(struct elf *elf)
/* Update changed relocation sections and section headers: */
list_for_each_entry(sec, &elf->sections, list) {
- if (sec->truncate)
- elf_truncate_section(elf, sec);
+ if (sec->truncate && elf_truncate_section(elf, sec))
+ return -1;
if (sec_changed(sec)) {
s = elf_getscn(elf->elf, sec->idx);
@@ -1366,7 +1784,7 @@ int elf_write(struct elf *elf)
return 0;
}
-void elf_close(struct elf *elf)
+int elf_close(struct elf *elf)
{
if (elf->elf)
elf_end(elf->elf);
@@ -1374,8 +1792,12 @@ void elf_close(struct elf *elf)
if (elf->fd > 0)
close(elf->fd);
+ if (elf->tmp_name && rename(elf->tmp_name, elf->name))
+ return -1;
+
/*
* NOTE: All remaining allocations are leaked on purpose. Objtool is
* about to exit anyway.
*/
+ return 0;
}
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 01ef6f415adf..8866158975fc 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -71,7 +71,7 @@ struct stack_op {
struct instruction;
-int arch_ftrace_match(char *name);
+int arch_ftrace_match(const char *name);
void arch_initial_func_cfi_state(struct cfi_init_state *state);
@@ -83,7 +83,8 @@ bool arch_callee_saved_reg(unsigned char reg);
unsigned long arch_jump_destination(struct instruction *insn);
-unsigned long arch_dest_reloc_offset(int addend);
+s64 arch_insn_adjusted_addend(struct instruction *insn, struct reloc *reloc);
+u64 arch_adjusted_addend(struct reloc *reloc);
const char *arch_nop_insn(int len);
const char *arch_ret_insn(int len);
@@ -97,8 +98,20 @@ bool arch_is_embedded_insn(struct symbol *sym);
int arch_rewrite_retpolines(struct objtool_file *file);
bool arch_pc_relative_reloc(struct reloc *reloc);
+bool arch_absolute_reloc(struct elf *elf, struct reloc *reloc);
unsigned int arch_reloc_size(struct reloc *reloc);
unsigned long arch_jump_table_sym_offset(struct reloc *reloc, struct reloc *table);
+extern const char *arch_reg_name[CFI_NUM_REGS];
+
+#ifdef DISAS
+
+#include <bfd.h>
+#include <dis-asm.h>
+
+int arch_disas_info_init(struct disassemble_info *dinfo);
+
+#endif /* DISAS */
+
#endif /* _ARCH_H */
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 6b08666fa69d..b9e229ed4dc0 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -9,12 +9,15 @@
struct opts {
/* actions: */
+ bool cfi;
+ bool checksum;
bool dump_orc;
bool hack_jump_label;
bool hack_noinstr;
bool hack_skylake;
bool ibt;
bool mcount;
+ bool noabs;
bool noinstr;
bool orc;
bool retpoline;
@@ -25,10 +28,12 @@ struct opts {
bool static_call;
bool uaccess;
int prefix;
- bool cfi;
+ const char *disas;
/* options: */
bool backtrace;
+ bool backup;
+ const char *debug_checksum;
bool dryrun;
bool link;
bool mnop;
@@ -37,8 +42,10 @@ struct opts {
const char *output;
bool sec_address;
bool stats;
+ const char *trace;
bool verbose;
bool werror;
+ bool wide;
};
extern struct opts opts;
@@ -47,6 +54,8 @@ int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
int objtool_run(int argc, const char **argv);
-void print_args(void);
+int make_backup(void);
+
+int cmd_klp(int argc, const char **argv);
#endif /* _BUILTIN_H */
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index 00fb745e7233..2e1346ad5e92 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -36,6 +36,19 @@ struct alt_group {
struct cfi_state **cfi;
bool ignore;
+ unsigned int feature;
+};
+
+enum alternative_type {
+ ALT_TYPE_INSTRUCTIONS,
+ ALT_TYPE_JUMP_TABLE,
+ ALT_TYPE_EX_TABLE,
+};
+
+struct alternative {
+ struct alternative *next;
+ struct instruction *insn;
+ enum alternative_type type;
};
#define INSN_CHUNK_BITS 8
@@ -64,8 +77,11 @@ struct instruction {
noendbr : 1,
unret : 1,
visited : 4,
- no_reloc : 1;
- /* 10 bit hole */
+ no_reloc : 1,
+ hole : 1,
+ fake : 1,
+ trace : 1;
+ /* 9 bit hole */
struct alt_group *alt_group;
struct instruction *jump_dest;
@@ -115,6 +131,15 @@ static inline bool is_jump(struct instruction *insn)
return is_static_jump(insn) || is_dynamic_jump(insn);
}
+static inline struct symbol *insn_call_dest(struct instruction *insn)
+{
+ if (insn->type == INSN_JUMP_DYNAMIC ||
+ insn->type == INSN_CALL_DYNAMIC)
+ return NULL;
+
+ return insn->_call_dest;
+}
+
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset);
@@ -125,4 +150,14 @@ struct instruction *next_insn_same_sec(struct objtool_file *file, struct instruc
insn && insn->sec == _sec; \
insn = next_insn_same_sec(file, insn))
+#define sym_for_each_insn(file, sym, insn) \
+ for (insn = find_insn(file, sym->sec, sym->offset); \
+ insn && insn->offset < sym->offset + sym->len; \
+ insn = next_insn_same_sec(file, insn))
+
+const char *objtool_disas_insn(struct instruction *insn);
+
+extern size_t sym_name_max_len;
+extern struct disas_context *objtool_disas_ctx;
+
#endif /* _CHECK_H */
diff --git a/tools/objtool/include/objtool/checksum.h b/tools/objtool/include/objtool/checksum.h
new file mode 100644
index 000000000000..7fe21608722a
--- /dev/null
+++ b/tools/objtool/include/objtool/checksum.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _OBJTOOL_CHECKSUM_H
+#define _OBJTOOL_CHECKSUM_H
+
+#include <objtool/elf.h>
+
+#ifdef BUILD_KLP
+
+static inline void checksum_init(struct symbol *func)
+{
+ if (func && !func->csum.state) {
+ func->csum.state = XXH3_createState();
+ XXH3_64bits_reset(func->csum.state);
+ }
+}
+
+static inline void checksum_update(struct symbol *func,
+ struct instruction *insn,
+ const void *data, size_t size)
+{
+ XXH3_64bits_update(func->csum.state, data, size);
+ dbg_checksum(func, insn, XXH3_64bits_digest(func->csum.state));
+}
+
+static inline void checksum_finish(struct symbol *func)
+{
+ if (func && func->csum.state) {
+ func->csum.checksum = XXH3_64bits_digest(func->csum.state);
+ func->csum.state = NULL;
+ }
+}
+
+#else /* !BUILD_KLP */
+
+static inline void checksum_init(struct symbol *func) {}
+static inline void checksum_update(struct symbol *func,
+ struct instruction *insn,
+ const void *data, size_t size) {}
+static inline void checksum_finish(struct symbol *func) {}
+
+#endif /* !BUILD_KLP */
+
+#endif /* _OBJTOOL_CHECKSUM_H */
diff --git a/tools/objtool/include/objtool/checksum_types.h b/tools/objtool/include/objtool/checksum_types.h
new file mode 100644
index 000000000000..507efdd8ab5b
--- /dev/null
+++ b/tools/objtool/include/objtool/checksum_types.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _OBJTOOL_CHECKSUM_TYPES_H
+#define _OBJTOOL_CHECKSUM_TYPES_H
+
+struct sym_checksum {
+ u64 addr;
+ u64 checksum;
+};
+
+#ifdef BUILD_KLP
+
+#include <xxhash.h>
+
+struct checksum {
+ XXH3_state_t *state;
+ XXH64_hash_t checksum;
+};
+
+#else /* !BUILD_KLP */
+
+struct checksum {};
+
+#endif /* !BUILD_KLP */
+
+#endif /* _OBJTOOL_CHECKSUM_TYPES_H */
diff --git a/tools/objtool/include/objtool/disas.h b/tools/objtool/include/objtool/disas.h
new file mode 100644
index 000000000000..e8f395eff159
--- /dev/null
+++ b/tools/objtool/include/objtool/disas.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates.
+ */
+
+#ifndef _DISAS_H
+#define _DISAS_H
+
+struct alternative;
+struct disas_context;
+struct disassemble_info;
+
+#ifdef DISAS
+
+struct disas_context *disas_context_create(struct objtool_file *file);
+void disas_context_destroy(struct disas_context *dctx);
+void disas_warned_funcs(struct disas_context *dctx);
+void disas_funcs(struct disas_context *dctx);
+int disas_info_init(struct disassemble_info *dinfo,
+ int arch, int mach32, int mach64,
+ const char *options);
+size_t disas_insn(struct disas_context *dctx, struct instruction *insn);
+char *disas_result(struct disas_context *dctx);
+void disas_print_info(FILE *stream, struct instruction *insn, int depth,
+ const char *format, ...);
+void disas_print_insn(FILE *stream, struct disas_context *dctx,
+ struct instruction *insn, int depth,
+ const char *format, ...);
+char *disas_alt_name(struct alternative *alt);
+const char *disas_alt_type_name(struct instruction *insn);
+
+#else /* DISAS */
+
+#include <objtool/warn.h>
+
+static inline struct disas_context *disas_context_create(struct objtool_file *file)
+{
+ WARN("Rebuild with libopcodes for disassembly support");
+ return NULL;
+}
+
+static inline void disas_context_destroy(struct disas_context *dctx) {}
+static inline void disas_warned_funcs(struct disas_context *dctx) {}
+static inline void disas_funcs(struct disas_context *dctx) {}
+
+static inline int disas_info_init(struct disassemble_info *dinfo,
+ int arch, int mach32, int mach64,
+ const char *options)
+{
+ return -1;
+}
+
+static inline size_t disas_insn(struct disas_context *dctx,
+ struct instruction *insn)
+{
+ return -1;
+}
+
+static inline char *disas_result(struct disas_context *dctx)
+{
+ return NULL;
+}
+
+static inline void disas_print_info(FILE *stream, struct instruction *insn,
+ int depth, const char *format, ...) {}
+static inline void disas_print_insn(FILE *stream, struct disas_context *dctx,
+ struct instruction *insn, int depth,
+ const char *format, ...) {}
+static inline char *disas_alt_name(struct alternative *alt)
+{
+ return NULL;
+}
+
+static inline const char *disas_alt_type_name(struct instruction *insn)
+{
+ return NULL;
+}
+
+#endif /* DISAS */
+
+#endif /* _DISAS_H */
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 0a2fa3ac0079..e12c516bd320 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -8,12 +8,21 @@
#include <stdio.h>
#include <gelf.h>
+#include <linux/string.h>
#include <linux/list.h>
#include <linux/hashtable.h>
#include <linux/rbtree.h>
#include <linux/jhash.h>
+
+#include <objtool/endianness.h>
+#include <objtool/checksum_types.h>
#include <arch/elf.h>
+#define SEC_NAME_LEN 1024
+#define SYM_NAME_LEN 512
+
+#define bswap_if_needed(elf, val) __bswap_if_needed(&elf->ehdr, val)
+
#ifdef LIBELF_USE_DEPRECATED
# define elf_getshdrnum elf_getshnum
# define elf_getshdrstrndx elf_getshstrndx
@@ -40,24 +49,27 @@ struct section {
struct section *base, *rsec;
struct symbol *sym;
Elf_Data *data;
- char *name;
+ const char *name;
int idx;
bool _changed, text, rodata, noinstr, init, truncate;
struct reloc *relocs;
+ unsigned long nr_alloc_relocs;
+ struct section *twin;
};
struct symbol {
struct list_head list;
+ struct list_head global_list;
struct rb_node node;
struct elf_hash_node hash;
struct elf_hash_node name_hash;
GElf_Sym sym;
struct section *sec;
- char *name;
+ const char *name, *demangled_name;
unsigned int idx, len;
unsigned long offset;
unsigned long __subtree_last;
- struct symbol *pfunc, *cfunc, *alias;
+ struct symbol *pfunc, *cfunc, *alias, *file;
unsigned char bind, type;
u8 uaccess_safe : 1;
u8 static_call_tramp : 1;
@@ -70,9 +82,18 @@ struct symbol {
u8 local_label : 1;
u8 frame_pointer : 1;
u8 ignore : 1;
+ u8 nocfi : 1;
+ u8 cold : 1;
+ u8 prefix : 1;
+ u8 debug_checksum : 1;
+ u8 changed : 1;
+ u8 included : 1;
+ u8 klp : 1;
struct list_head pv_target;
struct reloc *relocs;
struct section *group_sec;
+ struct checksum csum;
+ struct symbol *twin, *clone;
};
struct reloc {
@@ -87,9 +108,10 @@ struct elf {
GElf_Ehdr ehdr;
int fd;
bool changed;
- char *name;
+ const char *name, *tmp_name;
unsigned int num_files;
struct list_head sections;
+ struct list_head symbols;
unsigned long num_relocs;
int symbol_bits;
@@ -109,14 +131,37 @@ struct elf {
};
struct elf *elf_open_read(const char *name, int flags);
+struct elf *elf_create_file(GElf_Ehdr *ehdr, const char *name);
struct section *elf_create_section(struct elf *elf, const char *name,
- size_t entsize, unsigned int nr);
+ size_t size, size_t entsize,
+ unsigned int type, unsigned int align,
+ unsigned int flags);
struct section *elf_create_section_pair(struct elf *elf, const char *name,
size_t entsize, unsigned int nr,
unsigned int reloc_nr);
-struct symbol *elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size);
+struct section *elf_create_rela_section(struct elf *elf, struct section *sec,
+ unsigned int reloc_nr);
+
+struct symbol *elf_create_symbol(struct elf *elf, const char *name,
+ struct section *sec, unsigned int bind,
+ unsigned int type, unsigned long offset,
+ size_t size);
+struct symbol *elf_create_section_symbol(struct elf *elf, struct section *sec);
+
+void *elf_add_data(struct elf *elf, struct section *sec, const void *data,
+ size_t size);
+
+unsigned int elf_add_string(struct elf *elf, struct section *strtab, const char *str);
+
+struct reloc *elf_create_reloc(struct elf *elf, struct section *sec,
+ unsigned long offset, struct symbol *sym,
+ s64 addend, unsigned int type);
+
+struct reloc *elf_init_reloc(struct elf *elf, struct section *rsec,
+ unsigned int reloc_idx, unsigned long offset,
+ struct symbol *sym, s64 addend, unsigned int type);
struct reloc *elf_init_reloc_text_sym(struct elf *elf, struct section *sec,
unsigned long offset,
@@ -130,16 +175,17 @@ struct reloc *elf_init_reloc_data_sym(struct elf *elf, struct section *sec,
struct symbol *sym,
s64 addend);
-int elf_write_insn(struct elf *elf, struct section *sec,
- unsigned long offset, unsigned int len,
- const char *insn);
+int elf_write_insn(struct elf *elf, struct section *sec, unsigned long offset,
+ unsigned int len, const char *insn);
+
int elf_write(struct elf *elf);
-void elf_close(struct elf *elf);
+int elf_close(struct elf *elf);
struct section *find_section_by_name(const struct elf *elf, const char *name);
struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
struct symbol *find_symbol_by_name(const struct elf *elf, const char *name);
+struct symbol *find_global_symbol_by_name(const struct elf *elf, const char *name);
struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset);
int find_symbol_hole_containing(const struct section *sec, unsigned long offset);
struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset);
@@ -177,11 +223,76 @@ static inline unsigned int elf_text_rela_type(struct elf *elf)
return elf_addr_size(elf) == 4 ? R_TEXT32 : R_TEXT64;
}
+static inline bool is_undef_sym(struct symbol *sym)
+{
+ return !sym->sec->idx;
+}
+
+static inline bool is_null_sym(struct symbol *sym)
+{
+ return !sym->idx;
+}
+
+static inline bool is_sec_sym(struct symbol *sym)
+{
+ return sym->type == STT_SECTION;
+}
+
+static inline bool is_object_sym(struct symbol *sym)
+{
+ return sym->type == STT_OBJECT;
+}
+
+static inline bool is_func_sym(struct symbol *sym)
+{
+ return sym->type == STT_FUNC;
+}
+
+static inline bool is_file_sym(struct symbol *sym)
+{
+ return sym->type == STT_FILE;
+}
+
+static inline bool is_notype_sym(struct symbol *sym)
+{
+ return sym->type == STT_NOTYPE;
+}
+
+static inline bool is_global_sym(struct symbol *sym)
+{
+ return sym->bind == STB_GLOBAL;
+}
+
+static inline bool is_weak_sym(struct symbol *sym)
+{
+ return sym->bind == STB_WEAK;
+}
+
+static inline bool is_local_sym(struct symbol *sym)
+{
+ return sym->bind == STB_LOCAL;
+}
+
+static inline bool is_prefix_func(struct symbol *sym)
+{
+ return sym->prefix;
+}
+
static inline bool is_reloc_sec(struct section *sec)
{
return sec->sh.sh_type == SHT_RELA || sec->sh.sh_type == SHT_REL;
}
+static inline bool is_string_sec(struct section *sec)
+{
+ return sec->sh.sh_flags & SHF_STRINGS;
+}
+
+static inline bool is_text_sec(struct section *sec)
+{
+ return sec->sh.sh_flags & SHF_EXECINSTR;
+}
+
static inline bool sec_changed(struct section *sec)
{
return sec->_changed;
@@ -222,6 +333,11 @@ static inline bool is_32bit_reloc(struct reloc *reloc)
return reloc->sec->sh.sh_entsize < 16;
}
+static inline unsigned long sec_size(struct section *sec)
+{
+ return sec->sh.sh_size;
+}
+
#define __get_reloc_field(reloc, field) \
({ \
is_32bit_reloc(reloc) ? \
@@ -299,6 +415,15 @@ static inline void set_reloc_type(struct elf *elf, struct reloc *reloc, unsigned
mark_sec_changed(elf, reloc->sec, true);
}
+static inline unsigned int annotype(struct elf *elf, struct section *sec,
+ struct reloc *reloc)
+{
+ unsigned int type;
+
+ type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * 8) + 4);
+ return bswap_if_needed(elf, type);
+}
+
#define RELOC_JUMP_TABLE_BIT 1UL
/* Does reloc mark the beginning of a jump table? */
@@ -324,28 +449,54 @@ static inline void set_sym_next_reloc(struct reloc *reloc, struct reloc *next)
reloc->_sym_next_reloc = (unsigned long)next | bit;
}
-#define for_each_sec(file, sec) \
- list_for_each_entry(sec, &file->elf->sections, list)
+#define for_each_sec(elf, sec) \
+ list_for_each_entry(sec, &elf->sections, list)
#define sec_for_each_sym(sec, sym) \
list_for_each_entry(sym, &sec->symbol_list, list)
-#define for_each_sym(file, sym) \
- for (struct section *__sec, *__fake = (struct section *)1; \
- __fake; __fake = NULL) \
- for_each_sec(file, __sec) \
- sec_for_each_sym(__sec, sym)
+#define sec_prev_sym(sym) \
+ sym->sec && sym->list.prev != &sym->sec->symbol_list ? \
+ list_prev_entry(sym, list) : NULL
+
+#define for_each_sym(elf, sym) \
+ list_for_each_entry(sym, &elf->symbols, global_list)
+
+#define for_each_sym_continue(elf, sym) \
+ list_for_each_entry_continue(sym, &elf->symbols, global_list)
+
+#define rsec_next_reloc(rsec, reloc) \
+ reloc_idx(reloc) < sec_num_entries(rsec) - 1 ? reloc + 1 : NULL
#define for_each_reloc(rsec, reloc) \
- for (int __i = 0, __fake = 1; __fake; __fake = 0) \
- for (reloc = rsec->relocs; \
- __i < sec_num_entries(rsec); \
- __i++, reloc++)
+ for (reloc = rsec->relocs; reloc; reloc = rsec_next_reloc(rsec, reloc))
#define for_each_reloc_from(rsec, reloc) \
- for (int __i = reloc_idx(reloc); \
- __i < sec_num_entries(rsec); \
- __i++, reloc++)
+ for (; reloc; reloc = rsec_next_reloc(rsec, reloc))
+
+#define for_each_reloc_continue(rsec, reloc) \
+ for (reloc = rsec_next_reloc(rsec, reloc); reloc; \
+ reloc = rsec_next_reloc(rsec, reloc))
+
+#define sym_for_each_reloc(elf, sym, reloc) \
+ for (reloc = find_reloc_by_dest_range(elf, sym->sec, \
+ sym->offset, sym->len); \
+ reloc && reloc_offset(reloc) < sym->offset + sym->len; \
+ reloc = rsec_next_reloc(sym->sec->rsec, reloc))
+
+static inline struct symbol *get_func_prefix(struct symbol *func)
+{
+ struct symbol *prev;
+
+ if (!is_func_sym(func))
+ return NULL;
+
+ prev = sec_prev_sym(func);
+ if (prev && is_prefix_func(prev))
+ return prev;
+
+ return NULL;
+}
#define OFFSET_STRIDE_BITS 4
#define OFFSET_STRIDE (1UL << OFFSET_STRIDE_BITS)
diff --git a/tools/objtool/include/objtool/endianness.h b/tools/objtool/include/objtool/endianness.h
index 4d2aa9b0fe2f..aebcd2338668 100644
--- a/tools/objtool/include/objtool/endianness.h
+++ b/tools/objtool/include/objtool/endianness.h
@@ -4,7 +4,6 @@
#include <linux/kernel.h>
#include <endian.h>
-#include <objtool/elf.h>
/*
* Does a byte swap if target file endianness doesn't match the host, i.e. cross
@@ -12,16 +11,16 @@
* To be used for multi-byte values conversion, which are read from / about
* to be written to a target native endianness ELF file.
*/
-static inline bool need_bswap(struct elf *elf)
+static inline bool need_bswap(GElf_Ehdr *ehdr)
{
return (__BYTE_ORDER == __LITTLE_ENDIAN) ^
- (elf->ehdr.e_ident[EI_DATA] == ELFDATA2LSB);
+ (ehdr->e_ident[EI_DATA] == ELFDATA2LSB);
}
-#define bswap_if_needed(elf, val) \
+#define __bswap_if_needed(ehdr, val) \
({ \
__typeof__(val) __ret; \
- bool __need_bswap = need_bswap(elf); \
+ bool __need_bswap = need_bswap(ehdr); \
switch (sizeof(val)) { \
case 8: \
__ret = __need_bswap ? bswap_64(val) : (val); break; \
diff --git a/tools/objtool/include/objtool/klp.h b/tools/objtool/include/objtool/klp.h
new file mode 100644
index 000000000000..ad830a7ce55b
--- /dev/null
+++ b/tools/objtool/include/objtool/klp.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _OBJTOOL_KLP_H
+#define _OBJTOOL_KLP_H
+
+#define SHF_RELA_LIVEPATCH 0x00100000
+#define SHN_LIVEPATCH 0xff20
+
+/*
+ * __klp_objects and __klp_funcs are created by klp diff and used by the patch
+ * module init code to build the klp_patch, klp_object and klp_func structs
+ * needed by the livepatch API.
+ */
+#define KLP_OBJECTS_SEC "__klp_objects"
+#define KLP_FUNCS_SEC "__klp_funcs"
+
+/*
+ * __klp_relocs is an intermediate section which are created by klp diff and
+ * converted into KLP symbols/relas by "objtool klp post-link". This is needed
+ * to work around the linker, which doesn't preserve SHN_LIVEPATCH or
+ * SHF_RELA_LIVEPATCH, nor does it support having two RELA sections for a
+ * single PROGBITS section.
+ */
+#define KLP_RELOCS_SEC "__klp_relocs"
+#define KLP_STRINGS_SEC ".rodata.klp.str1.1"
+
+struct klp_reloc {
+ void *offset;
+ void *sym;
+ u32 type;
+};
+
+int cmd_klp_diff(int argc, const char **argv);
+int cmd_klp_post_link(int argc, const char **argv);
+
+#endif /* _OBJTOOL_KLP_H */
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index c0dc86a78ff6..6dc12a59ad00 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -28,7 +28,7 @@ struct objtool_file {
struct list_head mcount_loc_list;
struct list_head endbr_list;
struct list_head call_list;
- bool ignore_unreachables, hints, rodata;
+ bool ignore_unreachables, hints, rodata, klp;
unsigned int nr_endbr;
unsigned int nr_endbr_int;
@@ -39,6 +39,10 @@ struct objtool_file {
struct pv_state *pv_ops;
};
+char *top_level_dir(const char *file);
+
+int init_signal_handler(void);
+
struct objtool_file *objtool_open_read(const char *_objname);
int objtool_pv_add(struct objtool_file *file, int idx, struct symbol *func);
diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h
index 72d09c0adf1a..121c3761899c 100644
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -25,7 +25,7 @@ struct special_alt {
struct section *new_sec;
unsigned long new_off;
- unsigned int orig_len, new_len; /* group only */
+ unsigned int orig_len, new_len, feature; /* group only */
};
int special_get_alts(struct elf *elf, struct list_head *alts);
@@ -38,4 +38,6 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
struct reloc *arch_find_switch_table(struct objtool_file *file,
struct instruction *insn,
unsigned long *table_size);
+const char *arch_cpu_feature_name(int feature_number);
+
#endif /* _SPECIAL_H */
diff --git a/tools/objtool/include/objtool/trace.h b/tools/objtool/include/objtool/trace.h
new file mode 100644
index 000000000000..70b574366797
--- /dev/null
+++ b/tools/objtool/include/objtool/trace.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates.
+ */
+
+#ifndef _TRACE_H
+#define _TRACE_H
+
+#include <objtool/check.h>
+#include <objtool/disas.h>
+
+#ifdef DISAS
+
+extern bool trace;
+extern int trace_depth;
+
+#define TRACE(fmt, ...) \
+({ if (trace) \
+ fprintf(stderr, fmt, ##__VA_ARGS__); \
+})
+
+/*
+ * Print the instruction address and a message. The instruction
+ * itself is not printed.
+ */
+#define TRACE_ADDR(insn, fmt, ...) \
+({ \
+ if (trace) { \
+ disas_print_info(stderr, insn, trace_depth - 1, \
+ fmt "\n", ##__VA_ARGS__); \
+ } \
+})
+
+/*
+ * Print the instruction address, the instruction and a message.
+ */
+#define TRACE_INSN(insn, fmt, ...) \
+({ \
+ if (trace) { \
+ disas_print_insn(stderr, objtool_disas_ctx, \
+ insn, trace_depth - 1, \
+ fmt, ##__VA_ARGS__); \
+ fprintf(stderr, "\n"); \
+ insn->trace = 1; \
+ } \
+})
+
+#define TRACE_INSN_STATE(insn, sprev, snext) \
+({ \
+ if (trace) \
+ trace_insn_state(insn, sprev, snext); \
+})
+
+#define TRACE_ALT_FMT(pfx, fmt) pfx "<%s.%lx> " fmt
+#define TRACE_ALT_ARG(insn) disas_alt_type_name(insn), (insn)->offset
+
+#define TRACE_ALT(insn, fmt, ...) \
+ TRACE_INSN(insn, TRACE_ALT_FMT("", fmt), \
+ TRACE_ALT_ARG(insn), ##__VA_ARGS__)
+
+#define TRACE_ALT_INFO(insn, pfx, fmt, ...) \
+ TRACE_ADDR(insn, TRACE_ALT_FMT(pfx, fmt), \
+ TRACE_ALT_ARG(insn), ##__VA_ARGS__)
+
+#define TRACE_ALT_INFO_NOADDR(insn, pfx, fmt, ...) \
+ TRACE_ADDR(NULL, TRACE_ALT_FMT(pfx, fmt), \
+ TRACE_ALT_ARG(insn), ##__VA_ARGS__)
+
+#define TRACE_ALT_BEGIN(insn, alt, alt_name) \
+({ \
+ if (trace) { \
+ alt_name = disas_alt_name(alt); \
+ trace_alt_begin(insn, alt, alt_name); \
+ } \
+})
+
+#define TRACE_ALT_END(insn, alt, alt_name) \
+({ \
+ if (trace) { \
+ trace_alt_end(insn, alt, alt_name); \
+ free(alt_name); \
+ } \
+})
+
+static inline void trace_enable(void)
+{
+ trace = true;
+ trace_depth = 0;
+}
+
+static inline void trace_disable(void)
+{
+ trace = false;
+}
+
+static inline void trace_depth_inc(void)
+{
+ if (trace)
+ trace_depth++;
+}
+
+static inline void trace_depth_dec(void)
+{
+ if (trace)
+ trace_depth--;
+}
+
+void trace_insn_state(struct instruction *insn, struct insn_state *sprev,
+ struct insn_state *snext);
+void trace_alt_begin(struct instruction *orig_insn, struct alternative *alt,
+ char *alt_name);
+void trace_alt_end(struct instruction *orig_insn, struct alternative *alt,
+ char *alt_name);
+
+#else /* DISAS */
+
+#define TRACE(fmt, ...) ({})
+#define TRACE_ADDR(insn, fmt, ...) ({})
+#define TRACE_INSN(insn, fmt, ...) ({})
+#define TRACE_INSN_STATE(insn, sprev, snext) ({})
+#define TRACE_ALT(insn, fmt, ...) ({})
+#define TRACE_ALT_INFO(insn, fmt, ...) ({})
+#define TRACE_ALT_INFO_NOADDR(insn, fmt, ...) ({})
+#define TRACE_ALT_BEGIN(insn, alt, alt_name) ({})
+#define TRACE_ALT_END(insn, alt, alt_name) ({})
+
+
+static inline void trace_enable(void) {}
+static inline void trace_disable(void) {}
+static inline void trace_depth_inc(void) {}
+static inline void trace_depth_dec(void) {}
+static inline void trace_alt_begin(struct instruction *orig_insn,
+ struct alternative *alt,
+ char *alt_name) {};
+static inline void trace_alt_end(struct instruction *orig_insn,
+ struct alternative *alt,
+ char *alt_name) {};
+
+#endif
+
+#endif /* _TRACE_H */
diff --git a/tools/objtool/include/objtool/util.h b/tools/objtool/include/objtool/util.h
new file mode 100644
index 000000000000..a0180b312f73
--- /dev/null
+++ b/tools/objtool/include/objtool/util.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _UTIL_H
+#define _UTIL_H
+
+#include <objtool/warn.h>
+
+#define snprintf_check(str, size, format, args...) \
+({ \
+ int __ret = snprintf(str, size, format, args); \
+ if (__ret < 0) \
+ ERROR_GLIBC("snprintf"); \
+ else if (__ret >= size) \
+ ERROR("snprintf() failed for '" format "'", args); \
+ else \
+ __ret = 0; \
+ __ret; \
+})
+
+#endif /* _UTIL_H */
diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h
index cb8fe846d9dd..25ff7942b4d5 100644
--- a/tools/objtool/include/objtool/warn.h
+++ b/tools/objtool/include/objtool/warn.h
@@ -77,9 +77,11 @@ static inline char *offstr(struct section *sec, unsigned long offset)
#define WARN_INSN(insn, format, ...) \
({ \
struct instruction *_insn = (insn); \
- if (!_insn->sym || !_insn->sym->warned) \
+ if (!_insn->sym || !_insn->sym->warned) { \
WARN_FUNC(_insn->sec, _insn->offset, format, \
##__VA_ARGS__); \
+ BT_INSN(_insn, ""); \
+ } \
if (_insn->sym) \
_insn->sym->warned = 1; \
})
@@ -87,10 +89,15 @@ static inline char *offstr(struct section *sec, unsigned long offset)
#define BT_INSN(insn, format, ...) \
({ \
if (opts.verbose || opts.backtrace) { \
- struct instruction *_insn = (insn); \
- char *_str = offstr(_insn->sec, _insn->offset); \
- WARN(" %s: " format, _str, ##__VA_ARGS__); \
- free(_str); \
+ struct instruction *__insn = (insn); \
+ char *_str = offstr(__insn->sec, __insn->offset); \
+ const char *_istr = objtool_disas_insn(__insn); \
+ int _len; \
+ _len = snprintf(NULL, 0, " %s: " format, _str, ##__VA_ARGS__); \
+ _len = (_len < 50) ? 50 - _len : 0; \
+ WARN(" %s: " format " %*s%s", _str, ##__VA_ARGS__, _len, "", _istr); \
+ free(_str); \
+ __insn->trace = 1; \
} \
})
@@ -102,4 +109,53 @@ static inline char *offstr(struct section *sec, unsigned long offset)
#define ERROR_FUNC(sec, offset, format, ...) __WARN_FUNC(ERROR_STR, sec, offset, format, ##__VA_ARGS__)
#define ERROR_INSN(insn, format, ...) WARN_FUNC(insn->sec, insn->offset, format, ##__VA_ARGS__)
+extern bool debug;
+extern int indent;
+
+static inline void unindent(int *unused) { indent--; }
+
+/*
+ * Clang prior to 17 is being silly and considers many __cleanup() variables
+ * as unused (because they are, their sole purpose is to go out of scope).
+ *
+ * https://github.com/llvm/llvm-project/commit/877210faa447f4cc7db87812f8ed80e398fedd61
+ */
+#undef __cleanup
+#define __cleanup(func) __maybe_unused __attribute__((__cleanup__(func)))
+
+#define __dbg(format, ...) \
+ fprintf(stderr, \
+ "DEBUG: %s%s" format "\n", \
+ objname ?: "", \
+ objname ? ": " : "", \
+ ##__VA_ARGS__)
+
+#define dbg(args...) \
+({ \
+ if (unlikely(debug)) \
+ __dbg(args); \
+})
+
+#define __dbg_indent(format, ...) \
+({ \
+ if (unlikely(debug)) \
+ __dbg("%*s" format, indent * 8, "", ##__VA_ARGS__); \
+})
+
+#define dbg_indent(args...) \
+ int __cleanup(unindent) __dummy_##__COUNTER__; \
+ __dbg_indent(args); \
+ indent++
+
+#define dbg_checksum(func, insn, checksum) \
+({ \
+ if (unlikely(insn->sym && insn->sym->pfunc && \
+ insn->sym->pfunc->debug_checksum)) { \
+ char *insn_off = offstr(insn->sec, insn->offset); \
+ __dbg("checksum: %s %s %016lx", \
+ func->name, insn_off, checksum); \
+ free(insn_off); \
+ } \
+})
+
#endif /* _WARN_H */
diff --git a/tools/objtool/klp-diff.c b/tools/objtool/klp-diff.c
new file mode 100644
index 000000000000..4d1f9e9977eb
--- /dev/null
+++ b/tools/objtool/klp-diff.c
@@ -0,0 +1,1723 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#define _GNU_SOURCE /* memmem() */
+#include <subcmd/parse-options.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libgen.h>
+#include <stdio.h>
+#include <ctype.h>
+
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
+#include <objtool/arch.h>
+#include <objtool/klp.h>
+#include <objtool/util.h>
+#include <arch/special.h>
+
+#include <linux/objtool_types.h>
+#include <linux/livepatch_external.h>
+#include <linux/stringify.h>
+#include <linux/string.h>
+#include <linux/jhash.h>
+
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+
+struct elfs {
+ struct elf *orig, *patched, *out;
+ const char *modname;
+};
+
+struct export {
+ struct hlist_node hash;
+ char *mod, *sym;
+};
+
+static const char * const klp_diff_usage[] = {
+ "objtool klp diff [<options>] <in1.o> <in2.o> <out.o>",
+ NULL,
+};
+
+static const struct option klp_diff_options[] = {
+ OPT_GROUP("Options:"),
+ OPT_BOOLEAN('d', "debug", &debug, "enable debug output"),
+ OPT_END(),
+};
+
+static DEFINE_HASHTABLE(exports, 15);
+
+static inline u32 str_hash(const char *str)
+{
+ return jhash(str, strlen(str), 0);
+}
+
+static char *escape_str(const char *orig)
+{
+ size_t len = 0;
+ const char *a;
+ char *b, *new;
+
+ for (a = orig; *a; a++) {
+ switch (*a) {
+ case '\001': len += 5; break;
+ case '\n':
+ case '\t': len += 2; break;
+ default: len++;
+ }
+ }
+
+ new = malloc(len + 1);
+ if (!new)
+ return NULL;
+
+ for (a = orig, b = new; *a; a++) {
+ switch (*a) {
+ case '\001': memcpy(b, "<SOH>", 5); b += 5; break;
+ case '\n': *b++ = '\\'; *b++ = 'n'; break;
+ case '\t': *b++ = '\\'; *b++ = 't'; break;
+ default: *b++ = *a;
+ }
+ }
+
+ *b = '\0';
+ return new;
+}
+
+static int read_exports(void)
+{
+ const char *symvers = "Module.symvers";
+ char line[1024], *path = NULL;
+ unsigned int line_num = 1;
+ FILE *file;
+
+ file = fopen(symvers, "r");
+ if (!file) {
+ path = top_level_dir(symvers);
+ if (!path) {
+ ERROR("can't open '%s', \"objtool diff\" should be run from the kernel tree", symvers);
+ return -1;
+ }
+
+ file = fopen(path, "r");
+ if (!file) {
+ ERROR_GLIBC("fopen");
+ return -1;
+ }
+ }
+
+ while (fgets(line, 1024, file)) {
+ char *sym, *mod, *type;
+ struct export *export;
+
+ sym = strchr(line, '\t');
+ if (!sym) {
+ ERROR("malformed Module.symvers (sym) at line %d", line_num);
+ return -1;
+ }
+
+ *sym++ = '\0';
+
+ mod = strchr(sym, '\t');
+ if (!mod) {
+ ERROR("malformed Module.symvers (mod) at line %d", line_num);
+ return -1;
+ }
+
+ *mod++ = '\0';
+
+ type = strchr(mod, '\t');
+ if (!type) {
+ ERROR("malformed Module.symvers (type) at line %d", line_num);
+ return -1;
+ }
+
+ *type++ = '\0';
+
+ if (*sym == '\0' || *mod == '\0') {
+ ERROR("malformed Module.symvers at line %d", line_num);
+ return -1;
+ }
+
+ export = calloc(1, sizeof(*export));
+ if (!export) {
+ ERROR_GLIBC("calloc");
+ return -1;
+ }
+
+ export->mod = strdup(mod);
+ if (!export->mod) {
+ ERROR_GLIBC("strdup");
+ return -1;
+ }
+
+ export->sym = strdup(sym);
+ if (!export->sym) {
+ ERROR_GLIBC("strdup");
+ return -1;
+ }
+
+ hash_add(exports, &export->hash, str_hash(sym));
+ }
+
+ free(path);
+ fclose(file);
+
+ return 0;
+}
+
+static int read_sym_checksums(struct elf *elf)
+{
+ struct section *sec;
+
+ sec = find_section_by_name(elf, ".discard.sym_checksum");
+ if (!sec) {
+ ERROR("'%s' missing .discard.sym_checksum section, file not processed by 'objtool --checksum'?",
+ elf->name);
+ return -1;
+ }
+
+ if (!sec->rsec) {
+ ERROR("missing reloc section for .discard.sym_checksum");
+ return -1;
+ }
+
+ if (sec_size(sec) % sizeof(struct sym_checksum)) {
+ ERROR("struct sym_checksum size mismatch");
+ return -1;
+ }
+
+ for (int i = 0; i < sec_size(sec) / sizeof(struct sym_checksum); i++) {
+ struct sym_checksum *sym_checksum;
+ struct reloc *reloc;
+ struct symbol *sym;
+
+ sym_checksum = (struct sym_checksum *)sec->data->d_buf + i;
+
+ reloc = find_reloc_by_dest(elf, sec, i * sizeof(*sym_checksum));
+ if (!reloc) {
+ ERROR("can't find reloc for sym_checksum[%d]", i);
+ return -1;
+ }
+
+ sym = reloc->sym;
+
+ if (is_sec_sym(sym)) {
+ ERROR("not sure how to handle section %s", sym->name);
+ return -1;
+ }
+
+ if (is_func_sym(sym))
+ sym->csum.checksum = sym_checksum->checksum;
+ }
+
+ return 0;
+}
+
+static struct symbol *first_file_symbol(struct elf *elf)
+{
+ struct symbol *sym;
+
+ for_each_sym(elf, sym) {
+ if (is_file_sym(sym))
+ return sym;
+ }
+
+ return NULL;
+}
+
+static struct symbol *next_file_symbol(struct elf *elf, struct symbol *sym)
+{
+ for_each_sym_continue(elf, sym) {
+ if (is_file_sym(sym))
+ return sym;
+ }
+
+ return NULL;
+}
+
+/*
+ * Certain static local variables should never be correlated. They will be
+ * used in place rather than referencing the originals.
+ */
+static bool is_uncorrelated_static_local(struct symbol *sym)
+{
+ static const char * const vars[] = {
+ "__already_done.",
+ "__func__.",
+ "__key.",
+ "__warned.",
+ "_entry.",
+ "_entry_ptr.",
+ "_rs.",
+ "descriptor.",
+ "CSWTCH.",
+ };
+
+ if (!is_object_sym(sym) || !is_local_sym(sym))
+ return false;
+
+ if (!strcmp(sym->sec->name, ".data.once"))
+ return true;
+
+ for (int i = 0; i < ARRAY_SIZE(vars); i++) {
+ if (strstarts(sym->name, vars[i]))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Clang emits several useless .Ltmp_* code labels.
+ */
+static bool is_clang_tmp_label(struct symbol *sym)
+{
+ return sym->type == STT_NOTYPE &&
+ is_text_sec(sym->sec) &&
+ strstarts(sym->name, ".Ltmp") &&
+ isdigit(sym->name[5]);
+}
+
+static bool is_special_section(struct section *sec)
+{
+ static const char * const specials[] = {
+ ".altinstructions",
+ ".smp_locks",
+ "__bug_table",
+ "__ex_table",
+ "__jump_table",
+ "__mcount_loc",
+
+ /*
+ * Extract .static_call_sites here to inherit non-module
+ * preferential treatment. The later static call processing
+ * during klp module build will be skipped when it sees this
+ * section already exists.
+ */
+ ".static_call_sites",
+ };
+
+ static const char * const non_special_discards[] = {
+ ".discard.addressable",
+ ".discard.sym_checksum",
+ };
+
+ if (is_text_sec(sec))
+ return false;
+
+ for (int i = 0; i < ARRAY_SIZE(specials); i++) {
+ if (!strcmp(sec->name, specials[i]))
+ return true;
+ }
+
+ /* Most .discard data sections are special */
+ for (int i = 0; i < ARRAY_SIZE(non_special_discards); i++) {
+ if (!strcmp(sec->name, non_special_discards[i]))
+ return false;
+ }
+
+ return strstarts(sec->name, ".discard.");
+}
+
+/*
+ * These sections are referenced by special sections but aren't considered
+ * special sections themselves.
+ */
+static bool is_special_section_aux(struct section *sec)
+{
+ static const char * const specials_aux[] = {
+ ".altinstr_replacement",
+ ".altinstr_aux",
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(specials_aux); i++) {
+ if (!strcmp(sec->name, specials_aux[i]))
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * These symbols should never be correlated, so their local patched versions
+ * are used instead of linking to the originals.
+ */
+static bool dont_correlate(struct symbol *sym)
+{
+ return is_file_sym(sym) ||
+ is_null_sym(sym) ||
+ is_sec_sym(sym) ||
+ is_prefix_func(sym) ||
+ is_uncorrelated_static_local(sym) ||
+ is_clang_tmp_label(sym) ||
+ is_string_sec(sym->sec) ||
+ is_special_section(sym->sec) ||
+ is_special_section_aux(sym->sec) ||
+ strstarts(sym->name, "__initcall__");
+}
+
+/*
+ * For each symbol in the original kernel, find its corresponding "twin" in the
+ * patched kernel.
+ */
+static int correlate_symbols(struct elfs *e)
+{
+ struct symbol *file1_sym, *file2_sym;
+ struct symbol *sym1, *sym2;
+
+ /* Correlate locals */
+ for (file1_sym = first_file_symbol(e->orig),
+ file2_sym = first_file_symbol(e->patched); ;
+ file1_sym = next_file_symbol(e->orig, file1_sym),
+ file2_sym = next_file_symbol(e->patched, file2_sym)) {
+
+ if (!file1_sym && file2_sym) {
+ ERROR("FILE symbol mismatch: NULL != %s", file2_sym->name);
+ return -1;
+ }
+
+ if (file1_sym && !file2_sym) {
+ ERROR("FILE symbol mismatch: %s != NULL", file1_sym->name);
+ return -1;
+ }
+
+ if (!file1_sym)
+ break;
+
+ if (strcmp(file1_sym->name, file2_sym->name)) {
+ ERROR("FILE symbol mismatch: %s != %s", file1_sym->name, file2_sym->name);
+ return -1;
+ }
+
+ file1_sym->twin = file2_sym;
+ file2_sym->twin = file1_sym;
+
+ sym1 = file1_sym;
+
+ for_each_sym_continue(e->orig, sym1) {
+ if (is_file_sym(sym1) || !is_local_sym(sym1))
+ break;
+
+ if (dont_correlate(sym1))
+ continue;
+
+ sym2 = file2_sym;
+ for_each_sym_continue(e->patched, sym2) {
+ if (is_file_sym(sym2) || !is_local_sym(sym2))
+ break;
+
+ if (sym2->twin || dont_correlate(sym2))
+ continue;
+
+ if (strcmp(sym1->demangled_name, sym2->demangled_name))
+ continue;
+
+ sym1->twin = sym2;
+ sym2->twin = sym1;
+ break;
+ }
+ }
+ }
+
+ /* Correlate globals */
+ for_each_sym(e->orig, sym1) {
+ if (sym1->bind == STB_LOCAL)
+ continue;
+
+ sym2 = find_global_symbol_by_name(e->patched, sym1->name);
+
+ if (sym2 && !sym2->twin && !strcmp(sym1->name, sym2->name)) {
+ sym1->twin = sym2;
+ sym2->twin = sym1;
+ }
+ }
+
+ for_each_sym(e->orig, sym1) {
+ if (sym1->twin || dont_correlate(sym1))
+ continue;
+ WARN("no correlation: %s", sym1->name);
+ }
+
+ return 0;
+}
+
+/* "sympos" is used by livepatch to disambiguate duplicate symbol names */
+static unsigned long find_sympos(struct elf *elf, struct symbol *sym)
+{
+ bool vmlinux = str_ends_with(objname, "vmlinux.o");
+ unsigned long sympos = 0, nr_matches = 0;
+ bool has_dup = false;
+ struct symbol *s;
+
+ if (sym->bind != STB_LOCAL)
+ return 0;
+
+ if (vmlinux && sym->type == STT_FUNC) {
+ /*
+ * HACK: Unfortunately, symbol ordering can differ between
+ * vmlinux.o and vmlinux due to the linker script emitting
+ * .text.unlikely* before .text*. Count .text.unlikely* first.
+ *
+ * TODO: Disambiguate symbols more reliably (checksums?)
+ */
+ for_each_sym(elf, s) {
+ if (strstarts(s->sec->name, ".text.unlikely") &&
+ !strcmp(s->name, sym->name)) {
+ nr_matches++;
+ if (s == sym)
+ sympos = nr_matches;
+ else
+ has_dup = true;
+ }
+ }
+ for_each_sym(elf, s) {
+ if (!strstarts(s->sec->name, ".text.unlikely") &&
+ !strcmp(s->name, sym->name)) {
+ nr_matches++;
+ if (s == sym)
+ sympos = nr_matches;
+ else
+ has_dup = true;
+ }
+ }
+ } else {
+ for_each_sym(elf, s) {
+ if (!strcmp(s->name, sym->name)) {
+ nr_matches++;
+ if (s == sym)
+ sympos = nr_matches;
+ else
+ has_dup = true;
+ }
+ }
+ }
+
+ if (!sympos) {
+ ERROR("can't find sympos for %s", sym->name);
+ return ULONG_MAX;
+ }
+
+ return has_dup ? sympos : 0;
+}
+
+static int clone_sym_relocs(struct elfs *e, struct symbol *patched_sym);
+
+static struct symbol *__clone_symbol(struct elf *elf, struct symbol *patched_sym,
+ bool data_too)
+{
+ struct section *out_sec = NULL;
+ unsigned long offset = 0;
+ struct symbol *out_sym;
+
+ if (data_too && !is_undef_sym(patched_sym)) {
+ struct section *patched_sec = patched_sym->sec;
+
+ out_sec = find_section_by_name(elf, patched_sec->name);
+ if (!out_sec) {
+ out_sec = elf_create_section(elf, patched_sec->name, 0,
+ patched_sec->sh.sh_entsize,
+ patched_sec->sh.sh_type,
+ patched_sec->sh.sh_addralign,
+ patched_sec->sh.sh_flags);
+ if (!out_sec)
+ return NULL;
+ }
+
+ if (is_string_sec(patched_sym->sec)) {
+ out_sym = elf_create_section_symbol(elf, out_sec);
+ if (!out_sym)
+ return NULL;
+
+ goto sym_created;
+ }
+
+ if (!is_sec_sym(patched_sym))
+ offset = sec_size(out_sec);
+
+ if (patched_sym->len || is_sec_sym(patched_sym)) {
+ void *data = NULL;
+ size_t size;
+
+ /* bss doesn't have data */
+ if (patched_sym->sec->data->d_buf)
+ data = patched_sym->sec->data->d_buf + patched_sym->offset;
+
+ if (is_sec_sym(patched_sym))
+ size = sec_size(patched_sym->sec);
+ else
+ size = patched_sym->len;
+
+ if (!elf_add_data(elf, out_sec, data, size))
+ return NULL;
+ }
+ }
+
+ out_sym = elf_create_symbol(elf, patched_sym->name, out_sec,
+ patched_sym->bind, patched_sym->type,
+ offset, patched_sym->len);
+ if (!out_sym)
+ return NULL;
+
+sym_created:
+ patched_sym->clone = out_sym;
+ out_sym->clone = patched_sym;
+
+ return out_sym;
+}
+
+static const char *sym_type(struct symbol *sym)
+{
+ switch (sym->type) {
+ case STT_NOTYPE: return "NOTYPE";
+ case STT_OBJECT: return "OBJECT";
+ case STT_FUNC: return "FUNC";
+ case STT_SECTION: return "SECTION";
+ case STT_FILE: return "FILE";
+ default: return "UNKNOWN";
+ }
+}
+
+static const char *sym_bind(struct symbol *sym)
+{
+ switch (sym->bind) {
+ case STB_LOCAL: return "LOCAL";
+ case STB_GLOBAL: return "GLOBAL";
+ case STB_WEAK: return "WEAK";
+ default: return "UNKNOWN";
+ }
+}
+
+/*
+ * Copy a symbol to the output object, optionally including its data and
+ * relocations.
+ */
+static struct symbol *clone_symbol(struct elfs *e, struct symbol *patched_sym,
+ bool data_too)
+{
+ struct symbol *pfx;
+
+ if (patched_sym->clone)
+ return patched_sym->clone;
+
+ dbg_indent("%s%s", patched_sym->name, data_too ? " [+DATA]" : "");
+
+ /* Make sure the prefix gets cloned first */
+ if (is_func_sym(patched_sym) && data_too) {
+ pfx = get_func_prefix(patched_sym);
+ if (pfx)
+ clone_symbol(e, pfx, true);
+ }
+
+ if (!__clone_symbol(e->out, patched_sym, data_too))
+ return NULL;
+
+ if (data_too && clone_sym_relocs(e, patched_sym))
+ return NULL;
+
+ return patched_sym->clone;
+}
+
+static void mark_included_function(struct symbol *func)
+{
+ struct symbol *pfx;
+
+ func->included = 1;
+
+ /* Include prefix function */
+ pfx = get_func_prefix(func);
+ if (pfx)
+ pfx->included = 1;
+
+ /* Make sure .cold parent+child always stay together */
+ if (func->cfunc && func->cfunc != func)
+ func->cfunc->included = 1;
+ if (func->pfunc && func->pfunc != func)
+ func->pfunc->included = 1;
+}
+
+/*
+ * Copy all changed functions (and their dependencies) from the patched object
+ * to the output object.
+ */
+static int mark_changed_functions(struct elfs *e)
+{
+ struct symbol *sym_orig, *patched_sym;
+ bool changed = false;
+
+ /* Find changed functions */
+ for_each_sym(e->orig, sym_orig) {
+ if (!is_func_sym(sym_orig) || is_prefix_func(sym_orig))
+ continue;
+
+ patched_sym = sym_orig->twin;
+ if (!patched_sym)
+ continue;
+
+ if (sym_orig->csum.checksum != patched_sym->csum.checksum) {
+ patched_sym->changed = 1;
+ mark_included_function(patched_sym);
+ changed = true;
+ }
+ }
+
+ /* Find added functions and print them */
+ for_each_sym(e->patched, patched_sym) {
+ if (!is_func_sym(patched_sym) || is_prefix_func(patched_sym))
+ continue;
+
+ if (!patched_sym->twin) {
+ printf("%s: new function: %s\n", objname, patched_sym->name);
+ mark_included_function(patched_sym);
+ changed = true;
+ }
+ }
+
+ /* Print changed functions */
+ for_each_sym(e->patched, patched_sym) {
+ if (patched_sym->changed)
+ printf("%s: changed function: %s\n", objname, patched_sym->name);
+ }
+
+ return !changed ? -1 : 0;
+}
+
+static int clone_included_functions(struct elfs *e)
+{
+ struct symbol *patched_sym;
+
+ for_each_sym(e->patched, patched_sym) {
+ if (patched_sym->included) {
+ if (!clone_symbol(e, patched_sym, true))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Determine whether a relocation should reference the section rather than the
+ * underlying symbol.
+ */
+static bool section_reference_needed(struct section *sec)
+{
+ /*
+ * String symbols are zero-length and uncorrelated. It's easier to
+ * deal with them as section symbols.
+ */
+ if (is_string_sec(sec))
+ return true;
+
+ /*
+ * .rodata has mostly anonymous data so there's no way to determine the
+ * length of a needed reference. just copy the whole section if needed.
+ */
+ if (strstarts(sec->name, ".rodata"))
+ return true;
+
+ /* UBSAN anonymous data */
+ if (strstarts(sec->name, ".data..Lubsan") || /* GCC */
+ strstarts(sec->name, ".data..L__unnamed_")) /* Clang */
+ return true;
+
+ return false;
+}
+
+static bool is_reloc_allowed(struct reloc *reloc)
+{
+ return section_reference_needed(reloc->sym->sec) == is_sec_sym(reloc->sym);
+}
+
+static struct export *find_export(struct symbol *sym)
+{
+ struct export *export;
+
+ hash_for_each_possible(exports, export, hash, str_hash(sym->name)) {
+ if (!strcmp(export->sym, sym->name))
+ return export;
+ }
+
+ return NULL;
+}
+
+static const char *__find_modname(struct elfs *e)
+{
+ struct section *sec;
+ char *name;
+
+ sec = find_section_by_name(e->orig, ".modinfo");
+ if (!sec) {
+ ERROR("missing .modinfo section");
+ return NULL;
+ }
+
+ name = memmem(sec->data->d_buf, sec_size(sec), "\0name=", 6);
+ if (name)
+ return name + 6;
+
+ name = strdup(e->orig->name);
+ if (!name) {
+ ERROR_GLIBC("strdup");
+ return NULL;
+ }
+
+ for (char *c = name; *c; c++) {
+ if (*c == '/')
+ name = c + 1;
+ else if (*c == '-')
+ *c = '_';
+ else if (*c == '.') {
+ *c = '\0';
+ break;
+ }
+ }
+
+ return name;
+}
+
+/* Get the object's module name as defined by the kernel (and klp_object) */
+static const char *find_modname(struct elfs *e)
+{
+ const char *modname;
+
+ if (e->modname)
+ return e->modname;
+
+ modname = __find_modname(e);
+ e->modname = modname;
+ return modname;
+}
+
+/*
+ * Copying a function from its native compiled environment to a kernel module
+ * removes its natural access to local functions/variables and unexported
+ * globals. References to such symbols need to be converted to KLP relocs so
+ * the kernel arch relocation code knows to apply them and where to find the
+ * symbols. Particularly, duplicate static symbols need to be disambiguated.
+ */
+static bool klp_reloc_needed(struct reloc *patched_reloc)
+{
+ struct symbol *patched_sym = patched_reloc->sym;
+ struct export *export;
+
+ /* no external symbol to reference */
+ if (dont_correlate(patched_sym))
+ return false;
+
+ /* For included functions, a regular reloc will do. */
+ if (patched_sym->included)
+ return false;
+
+ /*
+ * If exported by a module, it has to be a klp reloc. Thanks to the
+ * clusterfunk that is late module patching, the patch module is
+ * allowed to be loaded before any modules it depends on.
+ *
+ * If exported by vmlinux, a normal reloc will do.
+ */
+ export = find_export(patched_sym);
+ if (export)
+ return strcmp(export->mod, "vmlinux");
+
+ if (!patched_sym->twin) {
+ /*
+ * Presumably the symbol and its reference were added by the
+ * patch. The symbol could be defined in this .o or in another
+ * .o in the patch module.
+ *
+ * This check needs to be *after* the export check due to the
+ * possibility of the patch adding a new UNDEF reference to an
+ * exported symbol.
+ */
+ return false;
+ }
+
+ /* Unexported symbol which lives in the original vmlinux or module. */
+ return true;
+}
+
+static int convert_reloc_sym_to_secsym(struct elf *elf, struct reloc *reloc)
+{
+ struct symbol *sym = reloc->sym;
+ struct section *sec = sym->sec;
+
+ if (!sec->sym && !elf_create_section_symbol(elf, sec))
+ return -1;
+
+ reloc->sym = sec->sym;
+ set_reloc_sym(elf, reloc, sym->idx);
+ set_reloc_addend(elf, reloc, sym->offset + reloc_addend(reloc));
+ return 0;
+}
+
+static int convert_reloc_secsym_to_sym(struct elf *elf, struct reloc *reloc)
+{
+ struct symbol *sym = reloc->sym;
+ struct section *sec = sym->sec;
+
+ /* If the symbol has a dedicated section, it's easy to find */
+ sym = find_symbol_by_offset(sec, 0);
+ if (sym && sym->len == sec_size(sec))
+ goto found_sym;
+
+ /* No dedicated section; find the symbol manually */
+ sym = find_symbol_containing(sec, arch_adjusted_addend(reloc));
+ if (!sym) {
+ /*
+ * This can happen for special section references to weak code
+ * whose symbol has been stripped by the linker.
+ */
+ return -1;
+ }
+
+found_sym:
+ reloc->sym = sym;
+ set_reloc_sym(elf, reloc, sym->idx);
+ set_reloc_addend(elf, reloc, reloc_addend(reloc) - sym->offset);
+ return 0;
+}
+
+/*
+ * Convert a relocation symbol reference to the needed format: either a section
+ * symbol or the underlying symbol itself.
+ */
+static int convert_reloc_sym(struct elf *elf, struct reloc *reloc)
+{
+ if (is_reloc_allowed(reloc))
+ return 0;
+
+ if (section_reference_needed(reloc->sym->sec))
+ return convert_reloc_sym_to_secsym(elf, reloc);
+ else
+ return convert_reloc_secsym_to_sym(elf, reloc);
+}
+
+/*
+ * Convert a regular relocation to a klp relocation (sort of).
+ */
+static int clone_reloc_klp(struct elfs *e, struct reloc *patched_reloc,
+ struct section *sec, unsigned long offset,
+ struct export *export)
+{
+ struct symbol *patched_sym = patched_reloc->sym;
+ s64 addend = reloc_addend(patched_reloc);
+ const char *sym_modname, *sym_orig_name;
+ static struct section *klp_relocs;
+ struct symbol *sym, *klp_sym;
+ unsigned long klp_reloc_off;
+ char sym_name[SYM_NAME_LEN];
+ struct klp_reloc klp_reloc;
+ unsigned long sympos;
+
+ if (!patched_sym->twin) {
+ ERROR("unexpected klp reloc for new symbol %s", patched_sym->name);
+ return -1;
+ }
+
+ /*
+ * Keep the original reloc intact for now to avoid breaking objtool run
+ * which relies on proper relocations for many of its features. This
+ * will be disabled later by "objtool klp post-link".
+ *
+ * Convert it to UNDEF (and WEAK to avoid modpost warnings).
+ */
+
+ sym = patched_sym->clone;
+ if (!sym) {
+ /* STB_WEAK: avoid modpost undefined symbol warnings */
+ sym = elf_create_symbol(e->out, patched_sym->name, NULL,
+ STB_WEAK, patched_sym->type, 0, 0);
+ if (!sym)
+ return -1;
+
+ patched_sym->clone = sym;
+ sym->clone = patched_sym;
+ }
+
+ if (!elf_create_reloc(e->out, sec, offset, sym, addend, reloc_type(patched_reloc)))
+ return -1;
+
+ /*
+ * Create the KLP symbol.
+ */
+
+ if (export) {
+ sym_modname = export->mod;
+ sym_orig_name = export->sym;
+ sympos = 0;
+ } else {
+ sym_modname = find_modname(e);
+ if (!sym_modname)
+ return -1;
+
+ sym_orig_name = patched_sym->twin->name;
+ sympos = find_sympos(e->orig, patched_sym->twin);
+ if (sympos == ULONG_MAX)
+ return -1;
+ }
+
+ /* symbol format: .klp.sym.modname.sym_name,sympos */
+ if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_SYM_PREFIX "%s.%s,%ld",
+ sym_modname, sym_orig_name, sympos))
+ return -1;
+
+ klp_sym = find_symbol_by_name(e->out, sym_name);
+ if (!klp_sym) {
+ __dbg_indent("%s", sym_name);
+
+ /* STB_WEAK: avoid modpost undefined symbol warnings */
+ klp_sym = elf_create_symbol(e->out, sym_name, NULL,
+ STB_WEAK, patched_sym->type, 0, 0);
+ if (!klp_sym)
+ return -1;
+ }
+
+ /*
+ * Create the __klp_relocs entry. This will be converted to an actual
+ * KLP rela by "objtool klp post-link".
+ *
+ * This intermediate step is necessary to prevent corruption by the
+ * linker, which doesn't know how to properly handle two rela sections
+ * applying to the same base section.
+ */
+
+ if (!klp_relocs) {
+ klp_relocs = elf_create_section(e->out, KLP_RELOCS_SEC, 0,
+ 0, SHT_PROGBITS, 8, SHF_ALLOC);
+ if (!klp_relocs)
+ return -1;
+ }
+
+ klp_reloc_off = sec_size(klp_relocs);
+ memset(&klp_reloc, 0, sizeof(klp_reloc));
+
+ klp_reloc.type = reloc_type(patched_reloc);
+ if (!elf_add_data(e->out, klp_relocs, &klp_reloc, sizeof(klp_reloc)))
+ return -1;
+
+ /* klp_reloc.offset */
+ if (!sec->sym && !elf_create_section_symbol(e->out, sec))
+ return -1;
+
+ if (!elf_create_reloc(e->out, klp_relocs,
+ klp_reloc_off + offsetof(struct klp_reloc, offset),
+ sec->sym, offset, R_ABS64))
+ return -1;
+
+ /* klp_reloc.sym */
+ if (!elf_create_reloc(e->out, klp_relocs,
+ klp_reloc_off + offsetof(struct klp_reloc, sym),
+ klp_sym, addend, R_ABS64))
+ return -1;
+
+ return 0;
+}
+
+#define dbg_clone_reloc(sec, offset, patched_sym, addend, export, klp) \
+ dbg_indent("%s+0x%lx: %s%s0x%lx [%s%s%s%s%s%s]", \
+ sec->name, offset, patched_sym->name, \
+ addend >= 0 ? "+" : "-", labs(addend), \
+ sym_type(patched_sym), \
+ patched_sym->type == STT_SECTION ? "" : " ", \
+ patched_sym->type == STT_SECTION ? "" : sym_bind(patched_sym), \
+ is_undef_sym(patched_sym) ? " UNDEF" : "", \
+ export ? " EXPORTED" : "", \
+ klp ? " KLP" : "")
+
+/* Copy a reloc and its symbol to the output object */
+static int clone_reloc(struct elfs *e, struct reloc *patched_reloc,
+ struct section *sec, unsigned long offset)
+{
+ struct symbol *patched_sym = patched_reloc->sym;
+ struct export *export = find_export(patched_sym);
+ long addend = reloc_addend(patched_reloc);
+ struct symbol *out_sym;
+ bool klp;
+
+ if (!is_reloc_allowed(patched_reloc)) {
+ ERROR_FUNC(patched_reloc->sec->base, reloc_offset(patched_reloc),
+ "missing symbol for reference to %s+%ld",
+ patched_sym->name, addend);
+ return -1;
+ }
+
+ klp = klp_reloc_needed(patched_reloc);
+
+ dbg_clone_reloc(sec, offset, patched_sym, addend, export, klp);
+
+ if (klp) {
+ if (clone_reloc_klp(e, patched_reloc, sec, offset, export))
+ return -1;
+
+ return 0;
+ }
+
+ /*
+ * Why !export sets 'data_too':
+ *
+ * Unexported non-klp symbols need to live in the patch module,
+ * otherwise there will be unresolved symbols. Notably, this includes:
+ *
+ * - New functions/data
+ * - String sections
+ * - Special section entries
+ * - Uncorrelated static local variables
+ * - UBSAN sections
+ */
+ out_sym = clone_symbol(e, patched_sym, patched_sym->included || !export);
+ if (!out_sym)
+ return -1;
+
+ /*
+ * For strings, all references use section symbols, thanks to
+ * section_reference_needed(). clone_symbol() has cloned an empty
+ * version of the string section. Now copy the string itself.
+ */
+ if (is_string_sec(patched_sym->sec)) {
+ const char *str = patched_sym->sec->data->d_buf + addend;
+
+ __dbg_indent("\"%s\"", escape_str(str));
+
+ addend = elf_add_string(e->out, out_sym->sec, str);
+ if (addend == -1)
+ return -1;
+ }
+
+ if (!elf_create_reloc(e->out, sec, offset, out_sym, addend,
+ reloc_type(patched_reloc)))
+ return -1;
+
+ return 0;
+}
+
+/* Copy all relocs needed for a symbol's contents */
+static int clone_sym_relocs(struct elfs *e, struct symbol *patched_sym)
+{
+ struct section *patched_rsec = patched_sym->sec->rsec;
+ struct reloc *patched_reloc;
+ unsigned long start, end;
+ struct symbol *out_sym;
+
+ out_sym = patched_sym->clone;
+ if (!out_sym) {
+ ERROR("no clone for %s", patched_sym->name);
+ return -1;
+ }
+
+ if (!patched_rsec)
+ return 0;
+
+ if (!is_sec_sym(patched_sym) && !patched_sym->len)
+ return 0;
+
+ if (is_string_sec(patched_sym->sec))
+ return 0;
+
+ if (is_sec_sym(patched_sym)) {
+ start = 0;
+ end = sec_size(patched_sym->sec);
+ } else {
+ start = patched_sym->offset;
+ end = start + patched_sym->len;
+ }
+
+ for_each_reloc(patched_rsec, patched_reloc) {
+ unsigned long offset;
+
+ if (reloc_offset(patched_reloc) < start ||
+ reloc_offset(patched_reloc) >= end)
+ continue;
+
+ /*
+ * Skip any reloc referencing .altinstr_aux. Its code is
+ * always patched by alternatives. See ALTERNATIVE_TERNARY().
+ */
+ if (patched_reloc->sym->sec &&
+ !strcmp(patched_reloc->sym->sec->name, ".altinstr_aux"))
+ continue;
+
+ if (convert_reloc_sym(e->patched, patched_reloc)) {
+ ERROR_FUNC(patched_rsec->base, reloc_offset(patched_reloc),
+ "failed to convert reloc sym '%s' to its proper format",
+ patched_reloc->sym->name);
+ return -1;
+ }
+
+ offset = out_sym->offset + (reloc_offset(patched_reloc) - patched_sym->offset);
+
+ if (clone_reloc(e, patched_reloc, out_sym->sec, offset))
+ return -1;
+ }
+ return 0;
+
+}
+
+static int create_fake_symbol(struct elf *elf, struct section *sec,
+ unsigned long offset, size_t size)
+{
+ char name[SYM_NAME_LEN];
+ unsigned int type;
+ static int ctr;
+ char *c;
+
+ if (snprintf_check(name, SYM_NAME_LEN, "%s_%d", sec->name, ctr++))
+ return -1;
+
+ for (c = name; *c; c++)
+ if (*c == '.')
+ *c = '_';
+
+ /*
+ * STT_NOTYPE: Prevent objtool from validating .altinstr_replacement
+ * while still allowing objdump to disassemble it.
+ */
+ type = is_text_sec(sec) ? STT_NOTYPE : STT_OBJECT;
+ return elf_create_symbol(elf, name, sec, STB_LOCAL, type, offset, size) ? 0 : -1;
+}
+
+/*
+ * Special sections (alternatives, etc) are basically arrays of structs.
+ * For all the special sections, create a symbol for each struct entry. This
+ * is a bit cumbersome, but it makes the extracting of the individual entries
+ * much more straightforward.
+ *
+ * There are three ways to identify the entry sizes for a special section:
+ *
+ * 1) ELF section header sh_entsize: Ideally this would be used almost
+ * everywhere. But unfortunately the toolchains make it difficult. The
+ * assembler .[push]section directive syntax only takes entsize when
+ * combined with SHF_MERGE. But Clang disallows combining SHF_MERGE with
+ * SHF_WRITE. And some special sections do need to be writable.
+ *
+ * Another place this wouldn't work is .altinstr_replacement, whose entries
+ * don't have a fixed size.
+ *
+ * 2) ANNOTATE_DATA_SPECIAL: This is a lightweight objtool annotation which
+ * points to the beginning of each entry. The size of the entry is then
+ * inferred by the location of the subsequent annotation (or end of
+ * section).
+ *
+ * 3) Simple array of pointers: If the special section is just a basic array of
+ * pointers, the entry size can be inferred by the number of relocations.
+ * No annotations needed.
+ *
+ * Note I also tried to create per-entry symbols at the time of creation, in
+ * the original [inline] asm. Unfortunately, creating uniquely named symbols
+ * is trickier than one might think, especially with Clang inline asm. I
+ * eventually just gave up trying to make that work, in favor of using
+ * ANNOTATE_DATA_SPECIAL and creating the symbols here after the fact.
+ */
+static int create_fake_symbols(struct elf *elf)
+{
+ struct section *sec;
+ struct reloc *reloc;
+
+ /*
+ * 1) Make symbols for all the ANNOTATE_DATA_SPECIAL entries:
+ */
+
+ sec = find_section_by_name(elf, ".discard.annotate_data");
+ if (!sec || !sec->rsec)
+ return 0;
+
+ for_each_reloc(sec->rsec, reloc) {
+ unsigned long offset, size;
+ struct reloc *next_reloc;
+
+ if (annotype(elf, sec, reloc) != ANNOTYPE_DATA_SPECIAL)
+ continue;
+
+ offset = reloc_addend(reloc);
+
+ size = 0;
+ next_reloc = reloc;
+ for_each_reloc_continue(sec->rsec, next_reloc) {
+ if (annotype(elf, sec, next_reloc) != ANNOTYPE_DATA_SPECIAL ||
+ next_reloc->sym->sec != reloc->sym->sec)
+ continue;
+
+ size = reloc_addend(next_reloc) - offset;
+ break;
+ }
+
+ if (!size)
+ size = sec_size(reloc->sym->sec) - offset;
+
+ if (create_fake_symbol(elf, reloc->sym->sec, offset, size))
+ return -1;
+ }
+
+ /*
+ * 2) Make symbols for sh_entsize, and simple arrays of pointers:
+ */
+
+ for_each_sec(elf, sec) {
+ unsigned int entry_size;
+ unsigned long offset;
+
+ if (!is_special_section(sec) || find_symbol_by_offset(sec, 0))
+ continue;
+
+ if (!sec->rsec) {
+ ERROR("%s: missing special section relocations", sec->name);
+ return -1;
+ }
+
+ entry_size = sec->sh.sh_entsize;
+ if (!entry_size) {
+ entry_size = arch_reloc_size(sec->rsec->relocs);
+ if (sec_size(sec) != entry_size * sec_num_entries(sec->rsec)) {
+ ERROR("%s: missing special section entsize or annotations", sec->name);
+ return -1;
+ }
+ }
+
+ for (offset = 0; offset < sec_size(sec); offset += entry_size) {
+ if (create_fake_symbol(elf, sec, offset, entry_size))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/* Keep a special section entry if it references an included function */
+static bool should_keep_special_sym(struct elf *elf, struct symbol *sym)
+{
+ struct reloc *reloc;
+
+ if (is_sec_sym(sym) || !sym->sec->rsec)
+ return false;
+
+ sym_for_each_reloc(elf, sym, reloc) {
+ if (convert_reloc_sym(elf, reloc))
+ continue;
+
+ if (is_func_sym(reloc->sym) && reloc->sym->included)
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Klp relocations aren't allowed for __jump_table and .static_call_sites if
+ * the referenced symbol lives in a kernel module, because such klp relocs may
+ * be applied after static branch/call init, resulting in code corruption.
+ *
+ * Validate a special section entry to avoid that. Note that an inert
+ * tracepoint is harmless enough, in that case just skip the entry and print a
+ * warning. Otherwise, return an error.
+ *
+ * This is only a temporary limitation which will be fixed when livepatch adds
+ * support for submodules: fully self-contained modules which are embedded in
+ * the top-level livepatch module's data and which can be loaded on demand when
+ * their corresponding to-be-patched module gets loaded. Then klp relocs can
+ * be retired.
+ *
+ * Return:
+ * -1: error: validation failed
+ * 1: warning: tracepoint skipped
+ * 0: success
+ */
+static int validate_special_section_klp_reloc(struct elfs *e, struct symbol *sym)
+{
+ bool static_branch = !strcmp(sym->sec->name, "__jump_table");
+ bool static_call = !strcmp(sym->sec->name, ".static_call_sites");
+ struct symbol *code_sym = NULL;
+ unsigned long code_offset = 0;
+ struct reloc *reloc;
+ int ret = 0;
+
+ if (!static_branch && !static_call)
+ return 0;
+
+ sym_for_each_reloc(e->patched, sym, reloc) {
+ const char *sym_modname;
+ struct export *export;
+
+ /* Static branch/call keys are always STT_OBJECT */
+ if (reloc->sym->type != STT_OBJECT) {
+
+ /* Save code location which can be printed below */
+ if (reloc->sym->type == STT_FUNC && !code_sym) {
+ code_sym = reloc->sym;
+ code_offset = reloc_addend(reloc);
+ }
+
+ continue;
+ }
+
+ if (!klp_reloc_needed(reloc))
+ continue;
+
+ export = find_export(reloc->sym);
+ if (export) {
+ sym_modname = export->mod;
+ } else {
+ sym_modname = find_modname(e);
+ if (!sym_modname)
+ return -1;
+ }
+
+ /* vmlinux keys are ok */
+ if (!strcmp(sym_modname, "vmlinux"))
+ continue;
+
+ if (static_branch) {
+ if (strstarts(reloc->sym->name, "__tracepoint_")) {
+ WARN("%s: disabling unsupported tracepoint %s",
+ code_sym->name, reloc->sym->name + 13);
+ ret = 1;
+ continue;
+ }
+
+ ERROR("%s+0x%lx: unsupported static branch key %s. Use static_key_enabled() instead",
+ code_sym->name, code_offset, reloc->sym->name);
+ return -1;
+ }
+
+ /* static call */
+ if (strstarts(reloc->sym->name, "__SCK__tp_func_")) {
+ ret = 1;
+ continue;
+ }
+
+ ERROR("%s()+0x%lx: unsupported static call key %s. Use KLP_STATIC_CALL() instead",
+ code_sym->name, code_offset, reloc->sym->name);
+ return -1;
+ }
+
+ return ret;
+}
+
+static int clone_special_section(struct elfs *e, struct section *patched_sec)
+{
+ struct symbol *patched_sym;
+
+ /*
+ * Extract all special section symbols (and their dependencies) which
+ * reference included functions.
+ */
+ sec_for_each_sym(patched_sec, patched_sym) {
+ int ret;
+
+ if (!is_object_sym(patched_sym))
+ continue;
+
+ if (!should_keep_special_sym(e->patched, patched_sym))
+ continue;
+
+ ret = validate_special_section_klp_reloc(e, patched_sym);
+ if (ret < 0)
+ return -1;
+ if (ret > 0)
+ continue;
+
+ if (!clone_symbol(e, patched_sym, true))
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Extract only the needed bits from special sections */
+static int clone_special_sections(struct elfs *e)
+{
+ struct section *patched_sec;
+
+ if (create_fake_symbols(e->patched))
+ return -1;
+
+ for_each_sec(e->patched, patched_sec) {
+ if (is_special_section(patched_sec)) {
+ if (clone_special_section(e, patched_sec))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Create __klp_objects and __klp_funcs sections which are intermediate
+ * sections provided as input to the patch module's init code for building the
+ * klp_patch, klp_object and klp_func structs for the livepatch API.
+ */
+static int create_klp_sections(struct elfs *e)
+{
+ size_t obj_size = sizeof(struct klp_object_ext);
+ size_t func_size = sizeof(struct klp_func_ext);
+ struct section *obj_sec, *funcs_sec, *str_sec;
+ struct symbol *funcs_sym, *str_sym, *sym;
+ char sym_name[SYM_NAME_LEN];
+ unsigned int nr_funcs = 0;
+ const char *modname;
+ void *obj_data;
+ s64 addend;
+
+ obj_sec = elf_create_section_pair(e->out, KLP_OBJECTS_SEC, obj_size, 0, 0);
+ if (!obj_sec)
+ return -1;
+
+ funcs_sec = elf_create_section_pair(e->out, KLP_FUNCS_SEC, func_size, 0, 0);
+ if (!funcs_sec)
+ return -1;
+
+ funcs_sym = elf_create_section_symbol(e->out, funcs_sec);
+ if (!funcs_sym)
+ return -1;
+
+ str_sec = elf_create_section(e->out, KLP_STRINGS_SEC, 0, 0,
+ SHT_PROGBITS, 1,
+ SHF_ALLOC | SHF_STRINGS | SHF_MERGE);
+ if (!str_sec)
+ return -1;
+
+ if (elf_add_string(e->out, str_sec, "") == -1)
+ return -1;
+
+ str_sym = elf_create_section_symbol(e->out, str_sec);
+ if (!str_sym)
+ return -1;
+
+ /* allocate klp_object_ext */
+ obj_data = elf_add_data(e->out, obj_sec, NULL, obj_size);
+ if (!obj_data)
+ return -1;
+
+ modname = find_modname(e);
+ if (!modname)
+ return -1;
+
+ /* klp_object_ext.name */
+ if (strcmp(modname, "vmlinux")) {
+ addend = elf_add_string(e->out, str_sec, modname);
+ if (addend == -1)
+ return -1;
+
+ if (!elf_create_reloc(e->out, obj_sec,
+ offsetof(struct klp_object_ext, name),
+ str_sym, addend, R_ABS64))
+ return -1;
+ }
+
+ /* klp_object_ext.funcs */
+ if (!elf_create_reloc(e->out, obj_sec, offsetof(struct klp_object_ext, funcs),
+ funcs_sym, 0, R_ABS64))
+ return -1;
+
+ for_each_sym(e->out, sym) {
+ unsigned long offset = nr_funcs * func_size;
+ unsigned long sympos;
+ void *func_data;
+
+ if (!is_func_sym(sym) || sym->cold || !sym->clone || !sym->clone->changed)
+ continue;
+
+ /* allocate klp_func_ext */
+ func_data = elf_add_data(e->out, funcs_sec, NULL, func_size);
+ if (!func_data)
+ return -1;
+
+ /* klp_func_ext.old_name */
+ addend = elf_add_string(e->out, str_sec, sym->clone->twin->name);
+ if (addend == -1)
+ return -1;
+
+ if (!elf_create_reloc(e->out, funcs_sec,
+ offset + offsetof(struct klp_func_ext, old_name),
+ str_sym, addend, R_ABS64))
+ return -1;
+
+ /* klp_func_ext.new_func */
+ if (!elf_create_reloc(e->out, funcs_sec,
+ offset + offsetof(struct klp_func_ext, new_func),
+ sym, 0, R_ABS64))
+ return -1;
+
+ /* klp_func_ext.sympos */
+ BUILD_BUG_ON(sizeof(sympos) != sizeof_field(struct klp_func_ext, sympos));
+ sympos = find_sympos(e->orig, sym->clone->twin);
+ if (sympos == ULONG_MAX)
+ return -1;
+ memcpy(func_data + offsetof(struct klp_func_ext, sympos), &sympos,
+ sizeof_field(struct klp_func_ext, sympos));
+
+ nr_funcs++;
+ }
+
+ /* klp_object_ext.nr_funcs */
+ BUILD_BUG_ON(sizeof(nr_funcs) != sizeof_field(struct klp_object_ext, nr_funcs));
+ memcpy(obj_data + offsetof(struct klp_object_ext, nr_funcs), &nr_funcs,
+ sizeof_field(struct klp_object_ext, nr_funcs));
+
+ /*
+ * Find callback pointers created by KLP_PRE_PATCH_CALLBACK() and
+ * friends, and add them to the klp object.
+ */
+
+ if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_PRE_PATCH_PREFIX "%s", modname))
+ return -1;
+
+ sym = find_symbol_by_name(e->out, sym_name);
+ if (sym) {
+ struct reloc *reloc;
+
+ reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset);
+
+ if (!elf_create_reloc(e->out, obj_sec,
+ offsetof(struct klp_object_ext, callbacks) +
+ offsetof(struct klp_callbacks, pre_patch),
+ reloc->sym, reloc_addend(reloc), R_ABS64))
+ return -1;
+ }
+
+ if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_POST_PATCH_PREFIX "%s", modname))
+ return -1;
+
+ sym = find_symbol_by_name(e->out, sym_name);
+ if (sym) {
+ struct reloc *reloc;
+
+ reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset);
+
+ if (!elf_create_reloc(e->out, obj_sec,
+ offsetof(struct klp_object_ext, callbacks) +
+ offsetof(struct klp_callbacks, post_patch),
+ reloc->sym, reloc_addend(reloc), R_ABS64))
+ return -1;
+ }
+
+ if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_PRE_UNPATCH_PREFIX "%s", modname))
+ return -1;
+
+ sym = find_symbol_by_name(e->out, sym_name);
+ if (sym) {
+ struct reloc *reloc;
+
+ reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset);
+
+ if (!elf_create_reloc(e->out, obj_sec,
+ offsetof(struct klp_object_ext, callbacks) +
+ offsetof(struct klp_callbacks, pre_unpatch),
+ reloc->sym, reloc_addend(reloc), R_ABS64))
+ return -1;
+ }
+
+ if (snprintf_check(sym_name, SYM_NAME_LEN, KLP_POST_UNPATCH_PREFIX "%s", modname))
+ return -1;
+
+ sym = find_symbol_by_name(e->out, sym_name);
+ if (sym) {
+ struct reloc *reloc;
+
+ reloc = find_reloc_by_dest(e->out, sym->sec, sym->offset);
+
+ if (!elf_create_reloc(e->out, obj_sec,
+ offsetof(struct klp_object_ext, callbacks) +
+ offsetof(struct klp_callbacks, post_unpatch),
+ reloc->sym, reloc_addend(reloc), R_ABS64))
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Copy all .modinfo import_ns= tags to ensure all namespaced exported symbols
+ * can be accessed via normal relocs.
+ */
+static int copy_import_ns(struct elfs *e)
+{
+ struct section *patched_sec, *out_sec = NULL;
+ char *import_ns, *data_end;
+
+ patched_sec = find_section_by_name(e->patched, ".modinfo");
+ if (!patched_sec)
+ return 0;
+
+ import_ns = patched_sec->data->d_buf;
+ if (!import_ns)
+ return 0;
+
+ for (data_end = import_ns + sec_size(patched_sec);
+ import_ns < data_end;
+ import_ns += strlen(import_ns) + 1) {
+
+ import_ns = memmem(import_ns, data_end - import_ns, "import_ns=", 10);
+ if (!import_ns)
+ return 0;
+
+ if (!out_sec) {
+ out_sec = find_section_by_name(e->out, ".modinfo");
+ if (!out_sec) {
+ out_sec = elf_create_section(e->out, ".modinfo", 0,
+ patched_sec->sh.sh_entsize,
+ patched_sec->sh.sh_type,
+ patched_sec->sh.sh_addralign,
+ patched_sec->sh.sh_flags);
+ if (!out_sec)
+ return -1;
+ }
+ }
+
+ if (!elf_add_data(e->out, out_sec, import_ns, strlen(import_ns) + 1))
+ return -1;
+ }
+
+ return 0;
+}
+
+int cmd_klp_diff(int argc, const char **argv)
+{
+ struct elfs e = {0};
+
+ argc = parse_options(argc, argv, klp_diff_options, klp_diff_usage, 0);
+ if (argc != 3)
+ usage_with_options(klp_diff_usage, klp_diff_options);
+
+ objname = argv[0];
+
+ e.orig = elf_open_read(argv[0], O_RDONLY);
+ e.patched = elf_open_read(argv[1], O_RDONLY);
+ e.out = NULL;
+
+ if (!e.orig || !e.patched)
+ return -1;
+
+ if (read_exports())
+ return -1;
+
+ if (read_sym_checksums(e.orig))
+ return -1;
+
+ if (read_sym_checksums(e.patched))
+ return -1;
+
+ if (correlate_symbols(&e))
+ return -1;
+
+ if (mark_changed_functions(&e))
+ return 0;
+
+ e.out = elf_create_file(&e.orig->ehdr, argv[2]);
+ if (!e.out)
+ return -1;
+
+ if (clone_included_functions(&e))
+ return -1;
+
+ if (clone_special_sections(&e))
+ return -1;
+
+ if (create_klp_sections(&e))
+ return -1;
+
+ if (copy_import_ns(&e))
+ return -1;
+
+ if (elf_write(e.out))
+ return -1;
+
+ return elf_close(e.out);
+}
diff --git a/tools/objtool/klp-post-link.c b/tools/objtool/klp-post-link.c
new file mode 100644
index 000000000000..c013e39957b1
--- /dev/null
+++ b/tools/objtool/klp-post-link.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Read the intermediate KLP reloc/symbol representations created by klp diff
+ * and convert them to the proper format required by livepatch. This needs to
+ * run last to avoid linker wreckage. Linkers don't tend to handle the "two
+ * rela sections for a single base section" case very well, nor do they like
+ * SHN_LIVEPATCH.
+ *
+ * This is the final tool in the livepatch module generation pipeline:
+ *
+ * kernel builds -> objtool klp diff -> module link -> objtool klp post-link
+ */
+
+#include <fcntl.h>
+#include <gelf.h>
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
+#include <objtool/klp.h>
+#include <objtool/util.h>
+#include <linux/livepatch_external.h>
+
+static int fix_klp_relocs(struct elf *elf)
+{
+ struct section *symtab, *klp_relocs;
+
+ klp_relocs = find_section_by_name(elf, KLP_RELOCS_SEC);
+ if (!klp_relocs)
+ return 0;
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (!symtab) {
+ ERROR("missing .symtab");
+ return -1;
+ }
+
+ for (int i = 0; i < sec_size(klp_relocs) / sizeof(struct klp_reloc); i++) {
+ struct klp_reloc *klp_reloc;
+ unsigned long klp_reloc_off;
+ struct section *sec, *tmp, *klp_rsec;
+ unsigned long offset;
+ struct reloc *reloc;
+ char sym_modname[64];
+ char rsec_name[SEC_NAME_LEN];
+ u64 addend;
+ struct symbol *sym, *klp_sym;
+
+ klp_reloc_off = i * sizeof(*klp_reloc);
+ klp_reloc = klp_relocs->data->d_buf + klp_reloc_off;
+
+ /*
+ * Read __klp_relocs[i]:
+ */
+
+ /* klp_reloc.sec_offset */
+ reloc = find_reloc_by_dest(elf, klp_relocs,
+ klp_reloc_off + offsetof(struct klp_reloc, offset));
+ if (!reloc) {
+ ERROR("malformed " KLP_RELOCS_SEC " section");
+ return -1;
+ }
+
+ sec = reloc->sym->sec;
+ offset = reloc_addend(reloc);
+
+ /* klp_reloc.sym */
+ reloc = find_reloc_by_dest(elf, klp_relocs,
+ klp_reloc_off + offsetof(struct klp_reloc, sym));
+ if (!reloc) {
+ ERROR("malformed " KLP_RELOCS_SEC " section");
+ return -1;
+ }
+
+ klp_sym = reloc->sym;
+ addend = reloc_addend(reloc);
+
+ /* symbol format: .klp.sym.modname.sym_name,sympos */
+ if (sscanf(klp_sym->name + strlen(KLP_SYM_PREFIX), "%55[^.]", sym_modname) != 1)
+ ERROR("can't find modname in klp symbol '%s'", klp_sym->name);
+
+ /*
+ * Create the KLP rela:
+ */
+
+ /* section format: .klp.rela.sec_objname.section_name */
+ if (snprintf_check(rsec_name, SEC_NAME_LEN,
+ KLP_RELOC_SEC_PREFIX "%s.%s",
+ sym_modname, sec->name))
+ return -1;
+
+ klp_rsec = find_section_by_name(elf, rsec_name);
+ if (!klp_rsec) {
+ klp_rsec = elf_create_section(elf, rsec_name, 0,
+ elf_rela_size(elf),
+ SHT_RELA, elf_addr_size(elf),
+ SHF_ALLOC | SHF_INFO_LINK | SHF_RELA_LIVEPATCH);
+ if (!klp_rsec)
+ return -1;
+
+ klp_rsec->sh.sh_link = symtab->idx;
+ klp_rsec->sh.sh_info = sec->idx;
+ klp_rsec->base = sec;
+ }
+
+ tmp = sec->rsec;
+ sec->rsec = klp_rsec;
+ if (!elf_create_reloc(elf, sec, offset, klp_sym, addend, klp_reloc->type))
+ return -1;
+ sec->rsec = tmp;
+
+ /*
+ * Fix up the corresponding KLP symbol:
+ */
+
+ klp_sym->sym.st_shndx = SHN_LIVEPATCH;
+ if (!gelf_update_sym(symtab->data, klp_sym->idx, &klp_sym->sym)) {
+ ERROR_ELF("gelf_update_sym");
+ return -1;
+ }
+
+ /*
+ * Disable the original non-KLP reloc by converting it to R_*_NONE:
+ */
+
+ reloc = find_reloc_by_dest(elf, sec, offset);
+ sym = reloc->sym;
+ sym->sym.st_shndx = SHN_LIVEPATCH;
+ set_reloc_type(elf, reloc, 0);
+ if (!gelf_update_sym(symtab->data, sym->idx, &sym->sym)) {
+ ERROR_ELF("gelf_update_sym");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * This runs on the livepatch module after all other linking has been done. It
+ * converts the intermediate __klp_relocs section into proper KLP relocs to be
+ * processed by livepatch. This needs to run last to avoid linker wreckage.
+ * Linkers don't tend to handle the "two rela sections for a single base
+ * section" case very well, nor do they appreciate SHN_LIVEPATCH.
+ */
+int cmd_klp_post_link(int argc, const char **argv)
+{
+ struct elf *elf;
+
+ argc--;
+ argv++;
+
+ if (argc != 1) {
+ fprintf(stderr, "%d\n", argc);
+ fprintf(stderr, "usage: objtool link <file.ko>\n");
+ return -1;
+ }
+
+ elf = elf_open_read(argv[0], O_RDWR);
+ if (!elf)
+ return -1;
+
+ if (fix_klp_relocs(elf))
+ return -1;
+
+ if (elf_write(elf))
+ return -1;
+
+ return elf_close(elf);
+}
diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h
index 6a922d046b8e..14f8ab653449 100644
--- a/tools/objtool/noreturns.h
+++ b/tools/objtool/noreturns.h
@@ -36,6 +36,7 @@ NORETURN(machine_real_restart)
NORETURN(make_task_dead)
NORETURN(mpt_halt_firmware)
NORETURN(mwait_play_dead)
+NORETURN(native_play_dead)
NORETURN(nmi_panic_self_stop)
NORETURN(panic)
NORETURN(vpanic)
@@ -45,7 +46,6 @@ NORETURN(rewind_stack_and_make_dead)
NORETURN(rust_begin_unwind)
NORETURN(rust_helper_BUG)
NORETURN(sev_es_terminate)
-NORETURN(snp_abort)
NORETURN(start_kernel)
NORETURN(stop_this_cpu)
NORETURN(usercopy_abort)
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 5c8b974ad0f9..1c3622117c33 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -16,7 +16,8 @@
#include <objtool/objtool.h>
#include <objtool/warn.h>
-bool help;
+bool debug;
+int indent;
static struct objtool_file file;
@@ -71,13 +72,54 @@ int objtool_pv_add(struct objtool_file *f, int idx, struct symbol *func)
return 0;
}
+char *top_level_dir(const char *file)
+{
+ ssize_t len, self_len, file_len;
+ char self[PATH_MAX], *str;
+ int i;
+
+ len = readlink("/proc/self/exe", self, sizeof(self) - 1);
+ if (len <= 0)
+ return NULL;
+ self[len] = '\0';
+
+ for (i = 0; i < 3; i++) {
+ char *s = strrchr(self, '/');
+ if (!s)
+ return NULL;
+ *s = '\0';
+ }
+
+ self_len = strlen(self);
+ file_len = strlen(file);
+
+ str = malloc(self_len + file_len + 2);
+ if (!str)
+ return NULL;
+
+ memcpy(str, self, self_len);
+ str[self_len] = '/';
+ strcpy(str + self_len + 1, file);
+
+ return str;
+}
+
int main(int argc, const char **argv)
{
static const char *UNUSED = "OBJTOOL_NOT_IMPLEMENTED";
+ if (init_signal_handler())
+ return -1;
+
/* libsubcmd init */
exec_cmd_init("objtool", UNUSED, UNUSED, UNUSED);
pager_init(UNUSED);
+ if (argc > 1 && !strcmp(argv[1], "klp")) {
+ argc--;
+ argv++;
+ return cmd_klp(argc, argv);
+ }
+
return objtool_run(argc, argv);
}
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index 1dd9fc18fe62..5a979f52425a 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -8,7 +8,6 @@
#include <objtool/objtool.h>
#include <objtool/orc.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
int orc_dump(const char *filename)
{
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 922e6aac7cea..1045e1380ffd 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -12,7 +12,6 @@
#include <objtool/check.h>
#include <objtool/orc.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
struct orc_list_entry {
struct list_head list;
@@ -57,7 +56,7 @@ int orc_create(struct objtool_file *file)
/* Build a deduplicated list of ORC entries: */
INIT_LIST_HEAD(&orc_list);
- for_each_sec(file, sec) {
+ for_each_sec(file->elf, sec) {
struct orc_entry orc, prev_orc = {0};
struct instruction *insn;
bool empty = true;
@@ -127,7 +126,11 @@ int orc_create(struct objtool_file *file)
return -1;
}
orc_sec = elf_create_section(file->elf, ".orc_unwind",
- sizeof(struct orc_entry), nr);
+ nr * sizeof(struct orc_entry),
+ sizeof(struct orc_entry),
+ SHT_PROGBITS,
+ 1,
+ SHF_ALLOC);
if (!orc_sec)
return -1;
diff --git a/tools/objtool/signal.c b/tools/objtool/signal.c
new file mode 100644
index 000000000000..af5c65c0fb2d
--- /dev/null
+++ b/tools/objtool/signal.c
@@ -0,0 +1,135 @@
+/*
+ * signal.c: Register a sigaltstack for objtool, to be able to
+ * run a signal handler on a separate stack even if
+ * the main process stack has overflown. Print out
+ * stack overflow errors when this happens.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <string.h>
+
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
+
+static unsigned long stack_limit;
+
+static bool is_stack_overflow(void *fault_addr)
+{
+ unsigned long fault = (unsigned long)fault_addr;
+
+ /* Check if fault is in the guard page just below the limit. */
+ return fault < stack_limit && fault >= stack_limit - 4096;
+}
+
+static void signal_handler(int sig_num, siginfo_t *info, void *context)
+{
+ struct sigaction sa_dfl = {0};
+ const char *sig_name;
+ char msg[256];
+ int msg_len;
+
+ switch (sig_num) {
+ case SIGSEGV: sig_name = "SIGSEGV"; break;
+ case SIGBUS: sig_name = "SIGBUS"; break;
+ case SIGILL: sig_name = "SIGILL"; break;
+ case SIGABRT: sig_name = "SIGABRT"; break;
+ default: sig_name = "Unknown signal"; break;
+ }
+
+ if (is_stack_overflow(info->si_addr)) {
+ msg_len = snprintf(msg, sizeof(msg),
+ "%s: error: %s: objtool stack overflow!\n",
+ objname, sig_name);
+ } else {
+ msg_len = snprintf(msg, sizeof(msg),
+ "%s: error: %s: objtool crash!\n",
+ objname, sig_name);
+ }
+
+ msg_len = write(STDERR_FILENO, msg, msg_len);
+
+ /* Re-raise the signal to trigger the core dump */
+ sa_dfl.sa_handler = SIG_DFL;
+ sigaction(sig_num, &sa_dfl, NULL);
+ raise(sig_num);
+}
+
+static int read_stack_limit(void)
+{
+ unsigned long stack_start, stack_end;
+ struct rlimit rlim;
+ char line[256];
+ int ret = 0;
+ FILE *fp;
+
+ if (getrlimit(RLIMIT_STACK, &rlim)) {
+ ERROR_GLIBC("getrlimit");
+ return -1;
+ }
+
+ fp = fopen("/proc/self/maps", "r");
+ if (!fp) {
+ ERROR_GLIBC("fopen");
+ return -1;
+ }
+
+ while (fgets(line, sizeof(line), fp)) {
+ if (strstr(line, "[stack]")) {
+ if (sscanf(line, "%lx-%lx", &stack_start, &stack_end) != 2) {
+ ERROR_GLIBC("sscanf");
+ ret = -1;
+ goto done;
+ }
+ stack_limit = stack_end - rlim.rlim_cur;
+ goto done;
+ }
+ }
+
+ ret = -1;
+ ERROR("/proc/self/maps: can't find [stack]");
+
+done:
+ fclose(fp);
+
+ return ret;
+}
+
+int init_signal_handler(void)
+{
+ int signals[] = {SIGSEGV, SIGBUS, SIGILL, SIGABRT};
+ struct sigaction sa;
+ stack_t ss;
+
+ if (read_stack_limit())
+ return -1;
+
+ ss.ss_sp = malloc(SIGSTKSZ);
+ if (!ss.ss_sp) {
+ ERROR_GLIBC("malloc");
+ return -1;
+ }
+ ss.ss_size = SIGSTKSZ;
+ ss.ss_flags = 0;
+
+ if (sigaltstack(&ss, NULL) == -1) {
+ ERROR_GLIBC("sigaltstack");
+ return -1;
+ }
+
+ sa.sa_sigaction = signal_handler;
+ sigemptyset(&sa.sa_mask);
+
+ sa.sa_flags = SA_ONSTACK | SA_SIGINFO;
+
+ for (int i = 0; i < ARRAY_SIZE(signals); i++) {
+ if (sigaction(signals[i], &sa, NULL) == -1) {
+ ERROR_GLIBC("sigaction");
+ return -1;
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index c80fed8a840e..2a533afbc69a 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -15,7 +15,6 @@
#include <objtool/builtin.h>
#include <objtool/special.h>
#include <objtool/warn.h>
-#include <objtool/endianness.h>
struct special_entry {
const char *sec;
@@ -82,6 +81,8 @@ static int get_alt_entry(struct elf *elf, const struct special_entry *entry,
entry->orig_len);
alt->new_len = *(unsigned char *)(sec->data->d_buf + offset +
entry->new_len);
+ alt->feature = *(unsigned int *)(sec->data->d_buf + offset +
+ entry->feature);
}
orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
@@ -133,7 +134,7 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
struct section *sec;
unsigned int nr_entries;
struct special_alt *alt;
- int idx, ret;
+ int idx;
INIT_LIST_HEAD(alts);
@@ -142,12 +143,12 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
if (!sec)
continue;
- if (sec->sh.sh_size % entry->size != 0) {
+ if (sec_size(sec) % entry->size != 0) {
ERROR("%s size not a multiple of %d", sec->name, entry->size);
return -1;
}
- nr_entries = sec->sh.sh_size / entry->size;
+ nr_entries = sec_size(sec) / entry->size;
for (idx = 0; idx < nr_entries; idx++) {
alt = malloc(sizeof(*alt));
@@ -157,11 +158,8 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
}
memset(alt, 0, sizeof(*alt));
- ret = get_alt_entry(elf, entry, sec, idx, alt);
- if (ret > 0)
- continue;
- if (ret < 0)
- return ret;
+ if (get_alt_entry(elf, entry, sec, idx, alt))
+ return -1;
list_add_tail(&alt->list, alts);
}
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 81d120d05442..e38167ca56a9 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -16,6 +16,8 @@ arch/x86/include/asm/orc_types.h
arch/x86/include/asm/emulate_prefix.h
arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
+include/linux/interval_tree_generic.h
+include/linux/livepatch_external.h
include/linux/static_call_types.h
"
diff --git a/tools/objtool/trace.c b/tools/objtool/trace.c
new file mode 100644
index 000000000000..5dec44dab781
--- /dev/null
+++ b/tools/objtool/trace.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates.
+ */
+
+#include <objtool/trace.h>
+
+bool trace;
+int trace_depth;
+
+/*
+ * Macros to trace CFI state attributes changes.
+ */
+
+#define TRACE_CFI_ATTR(attr, prev, next, fmt, ...) \
+({ \
+ if ((prev)->attr != (next)->attr) \
+ TRACE("%s=" fmt " ", #attr, __VA_ARGS__); \
+})
+
+#define TRACE_CFI_ATTR_BOOL(attr, prev, next) \
+ TRACE_CFI_ATTR(attr, prev, next, \
+ "%s", (next)->attr ? "true" : "false")
+
+#define TRACE_CFI_ATTR_NUM(attr, prev, next, fmt) \
+ TRACE_CFI_ATTR(attr, prev, next, fmt, (next)->attr)
+
+#define CFI_REG_NAME_MAXLEN 16
+
+/*
+ * Return the name of a register. Note that the same static buffer
+ * is returned if the name is dynamically generated.
+ */
+static const char *cfi_reg_name(unsigned int reg)
+{
+ static char rname_buffer[CFI_REG_NAME_MAXLEN];
+ const char *rname;
+
+ switch (reg) {
+ case CFI_UNDEFINED:
+ return "<undefined>";
+ case CFI_CFA:
+ return "cfa";
+ case CFI_SP_INDIRECT:
+ return "(sp)";
+ case CFI_BP_INDIRECT:
+ return "(bp)";
+ }
+
+ if (reg < CFI_NUM_REGS) {
+ rname = arch_reg_name[reg];
+ if (rname)
+ return rname;
+ }
+
+ if (snprintf(rname_buffer, CFI_REG_NAME_MAXLEN, "r%d", reg) == -1)
+ return "<error>";
+
+ return (const char *)rname_buffer;
+}
+
+/*
+ * Functions and macros to trace CFI registers changes.
+ */
+
+static void trace_cfi_reg(const char *prefix, int reg, const char *fmt,
+ int base_prev, int offset_prev,
+ int base_next, int offset_next)
+{
+ char *rname;
+
+ if (base_prev == base_next && offset_prev == offset_next)
+ return;
+
+ if (prefix)
+ TRACE("%s:", prefix);
+
+ if (base_next == CFI_UNDEFINED) {
+ TRACE("%1$s=<undef> ", cfi_reg_name(reg));
+ } else {
+ rname = strdup(cfi_reg_name(reg));
+ TRACE(fmt, rname, cfi_reg_name(base_next), offset_next);
+ free(rname);
+ }
+}
+
+static void trace_cfi_reg_val(const char *prefix, int reg,
+ int base_prev, int offset_prev,
+ int base_next, int offset_next)
+{
+ trace_cfi_reg(prefix, reg, "%1$s=%2$s%3$+d ",
+ base_prev, offset_prev, base_next, offset_next);
+}
+
+static void trace_cfi_reg_ref(const char *prefix, int reg,
+ int base_prev, int offset_prev,
+ int base_next, int offset_next)
+{
+ trace_cfi_reg(prefix, reg, "%1$s=(%2$s%3$+d) ",
+ base_prev, offset_prev, base_next, offset_next);
+}
+
+#define TRACE_CFI_REG_VAL(reg, prev, next) \
+ trace_cfi_reg_val(NULL, reg, prev.base, prev.offset, \
+ next.base, next.offset)
+
+#define TRACE_CFI_REG_REF(reg, prev, next) \
+ trace_cfi_reg_ref(NULL, reg, prev.base, prev.offset, \
+ next.base, next.offset)
+
+void trace_insn_state(struct instruction *insn, struct insn_state *sprev,
+ struct insn_state *snext)
+{
+ struct cfi_state *cprev, *cnext;
+ int i;
+
+ if (!memcmp(sprev, snext, sizeof(struct insn_state)))
+ return;
+
+ cprev = &sprev->cfi;
+ cnext = &snext->cfi;
+
+ disas_print_insn(stderr, objtool_disas_ctx, insn,
+ trace_depth - 1, "state: ");
+
+ /* print registers changes */
+ TRACE_CFI_REG_VAL(CFI_CFA, cprev->cfa, cnext->cfa);
+ for (i = 0; i < CFI_NUM_REGS; i++) {
+ TRACE_CFI_REG_VAL(i, cprev->vals[i], cnext->vals[i]);
+ TRACE_CFI_REG_REF(i, cprev->regs[i], cnext->regs[i]);
+ }
+
+ /* print attributes changes */
+ TRACE_CFI_ATTR_NUM(stack_size, cprev, cnext, "%d");
+ TRACE_CFI_ATTR_BOOL(drap, cprev, cnext);
+ if (cnext->drap) {
+ trace_cfi_reg_val("drap", cnext->drap_reg,
+ cprev->drap_reg, cprev->drap_offset,
+ cnext->drap_reg, cnext->drap_offset);
+ }
+ TRACE_CFI_ATTR_BOOL(bp_scratch, cprev, cnext);
+ TRACE_CFI_ATTR_NUM(instr, sprev, snext, "%d");
+ TRACE_CFI_ATTR_NUM(uaccess_stack, sprev, snext, "%u");
+
+ TRACE("\n");
+
+ insn->trace = 1;
+}
+
+void trace_alt_begin(struct instruction *orig_insn, struct alternative *alt,
+ char *alt_name)
+{
+ struct instruction *alt_insn;
+ char suffix[2];
+
+ alt_insn = alt->insn;
+
+ if (alt->type == ALT_TYPE_EX_TABLE) {
+ /*
+ * When there is an exception table then the instruction
+ * at the original location is executed but it can cause
+ * an exception. In that case, the execution will be
+ * redirected to the alternative instruction.
+ *
+ * The instruction at the original location can have
+ * instruction alternatives, so we just print the location
+ * of the instruction that can cause the exception and
+ * not the instruction itself.
+ */
+ TRACE_ALT_INFO_NOADDR(orig_insn, "/ ", "%s for instruction at 0x%lx <%s+0x%lx>",
+ alt_name,
+ orig_insn->offset, orig_insn->sym->name,
+ orig_insn->offset - orig_insn->sym->offset);
+ } else {
+ TRACE_ALT_INFO_NOADDR(orig_insn, "/ ", "%s", alt_name);
+ }
+
+ if (alt->type == ALT_TYPE_JUMP_TABLE) {
+ /*
+ * For a jump alternative, if the default instruction is
+ * a NOP then it is replaced with the jmp instruction,
+ * otherwise it is replaced with a NOP instruction.
+ */
+ trace_depth++;
+ if (orig_insn->type == INSN_NOP) {
+ suffix[0] = (orig_insn->len == 5) ? 'q' : '\0';
+ TRACE_ADDR(orig_insn, "jmp%-3s %lx <%s+0x%lx>", suffix,
+ alt_insn->offset, alt_insn->sym->name,
+ alt_insn->offset - alt_insn->sym->offset);
+ } else {
+ TRACE_ADDR(orig_insn, "nop%d", orig_insn->len);
+ trace_depth--;
+ }
+ }
+}
+
+void trace_alt_end(struct instruction *orig_insn, struct alternative *alt,
+ char *alt_name)
+{
+ if (alt->type == ALT_TYPE_JUMP_TABLE && orig_insn->type == INSN_NOP)
+ trace_depth--;
+ TRACE_ALT_INFO_NOADDR(orig_insn, "\\ ", "%s", alt_name);
+}
diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c
index d83f607733b0..d6562f292259 100644
--- a/tools/objtool/weak.c
+++ b/tools/objtool/weak.c
@@ -8,6 +8,8 @@
#include <stdbool.h>
#include <errno.h>
#include <objtool/objtool.h>
+#include <objtool/arch.h>
+#include <objtool/builtin.h>
#define UNSUPPORTED(name) \
({ \
@@ -24,3 +26,8 @@ int __weak orc_create(struct objtool_file *file)
{
UNSUPPORTED("ORC");
}
+
+int __weak cmd_klp(int argc, const char **argv)
+{
+ UNSUPPORTED("klp");
+}
diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt
index 83dc87c662b6..57b226e7fc2f 100644
--- a/tools/perf/Documentation/Build.txt
+++ b/tools/perf/Documentation/Build.txt
@@ -99,3 +99,18 @@ configuration paths for cross building:
In this case, the variable PKG_CONFIG_SYSROOT_DIR can be used alongside the
variable PKG_CONFIG_LIBDIR or PKG_CONFIG_PATH to prepend the sysroot path to
the library paths for cross compilation.
+
+5) Build with Clang
+===================
+By default, the makefile uses GCC as compiler. With specifying environment
+variables HOSTCC, CC and CXX, it allows to build perf with Clang.
+
+Using Clang for a native build:
+
+ $ HOSTCC=clang CC=clang CXX=clang++ make -C tools/perf
+
+Specifying ARCH and CROSS_COMPILE for cross compilation:
+
+ $ HOSTCC=clang CC=clang CXX=clang++ \
+ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- \
+ make -C tools/perf
diff --git a/tools/perf/Documentation/android.txt b/tools/perf/Documentation/android.txt
index 24a59998fc91..3f3cc7ac3d13 100644
--- a/tools/perf/Documentation/android.txt
+++ b/tools/perf/Documentation/android.txt
@@ -1,78 +1,10 @@
How to compile perf for Android
-=========================================
+===============================
-I. Set the Android NDK environment
-------------------------------------------------
+There are two ways to build perf and run it on Android:
-(a). Use the Android NDK
-------------------------------------------------
-1. You need to download and install the Android Native Development Kit (NDK).
-Set the NDK variable to point to the path where you installed the NDK:
- export NDK=/path/to/android-ndk
+- Method 1: Build perf with static linking. See Build.txt, section
+ "4) Cross compilation" for how to build a static perf binary.
-2. Set cross-compiling environment variables for NDK toolchain and sysroot.
-For arm:
- export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-
- export NDK_SYSROOT=${NDK}/platforms/android-24/arch-arm
-For x86:
- export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.9/prebuilt/linux-x86_64/bin/i686-linux-android-
- export NDK_SYSROOT=${NDK}/platforms/android-24/arch-x86
-
-This method is only tested for Android NDK versions Revision 11b and later.
-perf uses some bionic enhancements that are not included in prior NDK versions.
-You can use method (b) described below instead.
-
-(b). Use the Android source tree
------------------------------------------------
-1. Download the master branch of the Android source tree.
-Set the environment for the target you want using:
- source build/envsetup.sh
- lunch
-
-2. Build your own NDK sysroot to contain latest bionic changes and set the
-NDK sysroot environment variable.
- cd ${ANDROID_BUILD_TOP}/ndk
-For arm:
- ./build/tools/build-ndk-sysroot.sh --abi=arm
- export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-arm
-For x86:
- ./build/tools/build-ndk-sysroot.sh --abi=x86
- export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-x86
-
-3. Set the NDK toolchain environment variable.
-For arm:
- export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/arm-linux-androideabi-
-For x86:
- export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/i686-linux-android-
-
-II. Compile perf for Android
-------------------------------------------------
-You need to run make with the NDK toolchain and sysroot defined above:
-For arm:
- make WERROR=0 ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}"
-For x86:
- make WERROR=0 ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} EXTRA_CFLAGS="-pie --sysroot=${NDK_SYSROOT}"
-
-III. Install perf
------------------------------------------------
-You need to connect to your Android device/emulator using adb.
-Install perf using:
- adb push perf /data/perf
-
-If you also want to use perf-archive you need busybox tools for Android.
-For installing perf-archive, you first need to replace #!/bin/bash with #!/system/bin/sh:
- sed 's/#!\/bin\/bash/#!\/system\/bin\/sh/g' perf-archive >> /tmp/perf-archive
- chmod +x /tmp/perf-archive
- adb push /tmp/perf-archive /data/perf-archive
-
-IV. Environment settings for running perf
-------------------------------------------------
-Some perf features need environment variables to run properly.
-You need to set these before running perf on the target:
- adb shell
- # PERF_PAGER=cat
-
-IV. Run perf
-------------------------------------------------
-Run perf on your device/emulator to which you previously connected using adb:
- # ./data/perf
+- Method 2: Download the Android NDK and use the bundled Clang to
+ build perf. See Build.txt, section "5) Build with clang" for details.
diff --git a/tools/perf/Documentation/intel-acr.txt b/tools/perf/Documentation/intel-acr.txt
new file mode 100644
index 000000000000..72654fdd9a52
--- /dev/null
+++ b/tools/perf/Documentation/intel-acr.txt
@@ -0,0 +1,53 @@
+Intel Auto Counter Reload Support
+---------------------------------
+Support for Intel Auto Counter Reload in perf tools
+
+Auto counter reload provides a means for software to specify to hardware
+that certain counters, if supported, should be automatically reloaded
+upon overflow of chosen counters. By taking a sample only if the rate of
+one event exceeds some threshold relative to the rate of another event,
+this feature enables software to sample based on the relative rate of
+two or more events. To enable this, the user must provide a sample period
+term and a bitmask ("acr_mask") for each relevant event specifying the
+counters in an event group to reload if the event's specified sample
+period is exceeded.
+
+For example, if the user desires to measure a scenario when IPC > 2,
+the event group might look like the one below:
+
+ perf record -e {cpu_atom/instructions,period=200000,acr_mask=0x2/, \
+ cpu_atom/cycles,period=100000,acr_mask=0x3/} -- true
+
+In this case, if the "instructions" counter exceeds the sample period of
+200000, the second counter, "cycles", will be reset and a sample will be
+taken. If "cycles" is exceeded first, both counters in the group will be
+reset. In this way, samples will only be taken for cases where IPC > 2.
+
+The acr_mask term is a hexadecimal value representing a bitmask of the
+events in the group to be reset when the period is exceeded. In the
+example above, "instructions" is assigned an acr_mask of 0x2, meaning
+only the second event in the group is reloaded and a sample is taken
+for the first event. "cycles" is assigned an acr_mask of 0x3, meaning
+that both event counters will be reset if the sample period is exceeded
+first.
+
+ratio-to-prev Event Term
+------------------------
+To simplify this, an event term "ratio-to-prev" is provided which is used
+alongside the sample period term n or the -c/--count option. This would
+allow users to specify the desired relative rate between events as a
+ratio. Note: Both events compared must belong to the same PMU.
+
+The command above would then become
+
+ perf record -e {cpu_atom/instructions/, \
+ cpu_atom/cycles,period=100000,ratio-to-prev=0.5/} -- true
+
+ratio-to-prev is the ratio of the event using the term relative
+to the previous event in the group, which will always be 1,
+for a 1:0.5 or 2:1 ratio.
+
+To sample for IPC < 2 for example, the events need to be reordered:
+
+ perf record -e {cpu_atom/cycles/, \
+ cpu_atom/instructions,period=200000,ratio-to-prev=2.0/} -- true
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 46090c5b42b4..547f1a268018 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -170,7 +170,6 @@ include::itrace.txt[]
--code-with-type::
Show data type info in code annotation (for memory instructions only).
- Currently it only works with --stdio option.
SEE ALSO
diff --git a/tools/perf/Documentation/perf-arm-spe.txt b/tools/perf/Documentation/perf-arm-spe.txt
index 37afade4f1b2..8b02e5b983fa 100644
--- a/tools/perf/Documentation/perf-arm-spe.txt
+++ b/tools/perf/Documentation/perf-arm-spe.txt
@@ -141,27 +141,65 @@ Config parameters
These are placed between the // in the event and comma separated. For example '-e
arm_spe/load_filter=1,min_latency=10/'
- branch_filter=1 - collect branches only (PMSFCR.B)
- event_filter=<mask> - filter on specific events (PMSEVFR) - see bitfield description below
+ event_filter=<mask> - logical AND filter on specific events (PMSEVFR) - see bitfield description below
+ inv_event_filter=<mask> - logical OR to filter out specific events (PMSNEVFR, FEAT_SPEv1p2) - see bitfield description below
jitter=1 - use jitter to avoid resonance when sampling (PMSIRR.RND)
- load_filter=1 - collect loads only (PMSFCR.LD)
min_latency=<n> - collect only samples with this latency or higher* (PMSLATFR)
pa_enable=1 - collect physical address (as well as VA) of loads/stores (PMSCR.PA) - requires privilege
pct_enable=1 - collect physical timestamp instead of virtual timestamp (PMSCR.PCT) - requires privilege
- store_filter=1 - collect stores only (PMSFCR.ST)
ts_enable=1 - enable timestamping with value of generic timer (PMSCR.TS)
discard=1 - enable SPE PMU events but don't collect sample data - see 'Discard mode' (PMBLIMITR.FM = DISCARD)
+ inv_data_src_filter=<mask> - mask to filter from 0-63 possible data sources (PMSDSFR, FEAT_SPE_FDS) - See 'Data source filtering'
+++*+++ Latency is the total latency from the point at which sampling started on that instruction, rather
than only the execution latency.
-Only some events can be filtered on; these include:
-
- bit 1 - instruction retired (i.e. omit speculative instructions)
+Only some events can be filtered on using 'event_filter' bits. The overall
+filter is the logical AND of these bits, for example if bits 3 and 5 are set
+only samples that have both 'L1D cache refill' AND 'TLB walk' are recorded. When
+FEAT_SPEv1p2 is implemented 'inv_event_filter' can also be used to exclude
+events that have any (OR) of the filter's bits set. For example setting bits 3
+and 5 in 'inv_event_filter' will exclude any events that are either L1D cache
+refill OR TLB walk. If the same bit is set in both filters it's UNPREDICTABLE
+whether the sample is included or excluded. Filter bits for both event_filter
+and inv_event_filter are:
+
+ bit 1 - Instruction retired (i.e. omit speculative instructions)
+ bit 2 - L1D access (FEAT_SPEv1p4)
bit 3 - L1D refill
+ bit 4 - TLB access (FEAT_SPEv1p4)
bit 5 - TLB refill
- bit 7 - mispredict
- bit 11 - misaligned access
+ bit 6 - Not taken event (FEAT_SPEv1p2)
+ bit 7 - Mispredict
+ bit 8 - Last level cache access (FEAT_SPEv1p4)
+ bit 9 - Last level cache miss (FEAT_SPEv1p4)
+ bit 10 - Remote access (FEAT_SPEv1p4)
+ bit 11 - Misaligned access (FEAT_SPEv1p1)
+ bit 12-15 - IMPLEMENTATION DEFINED events (when implemented)
+ bit 16 - Transaction (FEAT_TME)
+ bit 17 - Partial or empty SME or SVE predicate (FEAT_SPEv1p1)
+ bit 18 - Empty SME or SVE predicate (FEAT_SPEv1p1)
+ bit 19 - L2D access (FEAT_SPEv1p4)
+ bit 20 - L2D miss (FEAT_SPEv1p4)
+ bit 21 - Cache data modified (FEAT_SPEv1p4)
+ bit 22 - Recently fetched (FEAT_SPEv1p4)
+ bit 23 - Data snooped (FEAT_SPEv1p4)
+ bit 24 - Streaming SVE mode event (when FEAT_SPE_SME is implemented), or
+ IMPLEMENTATION DEFINED event 24 (when implemented, only versions
+ less than FEAT_SPEv1p4)
+ bit 25 - SMCU or external coprocessor operation event when FEAT_SPE_SME is
+ implemented, or IMPLEMENTATION DEFINED event 25 (when implemented,
+ only versions less than FEAT_SPEv1p4)
+ bit 26-31 - IMPLEMENTATION DEFINED events (only versions less than FEAT_SPEv1p4)
+ bit 48-63 - IMPLEMENTATION DEFINED events (when implemented)
+
+For IMPLEMENTATION DEFINED bits, refer to the CPU TRM if these bits are
+implemented.
+
+The driver will reject events if requested filter bits require unimplemented SPE
+versions, but will not reject filter bits for unimplemented IMPDEF bits or when
+their related feature is not present (e.g. SME). For example, if FEAT_SPEv1p2 is
+not implemented, filtering on "Not taken event" (bit 6) will be rejected.
So to sample just retired instructions:
@@ -171,6 +209,31 @@ or just mispredicted branches:
perf record -e arm_spe/event_filter=0x80/ -- ./mybench
+When set, the following filters can be used to select samples that match any of
+the operation types (OR filtering). If only one is set then only samples of that
+type are collected:
+
+ branch_filter=1 - Collect branches (PMSFCR.B)
+ load_filter=1 - Collect loads (PMSFCR.LD)
+ store_filter=1 - Collect stores (PMSFCR.ST)
+
+When extended filtering is supported (FEAT_SPE_EFT), SIMD and float
+pointer operations can also be selected:
+
+ simd_filter=1 - Collect SIMD loads, stores and operations (PMSFCR.SIMD)
+ float_filter=1 - Collect floating point loads, stores and operations (PMSFCR.FP)
+
+When extended filtering is supported (FEAT_SPE_EFT), operation type filters can
+be changed to AND using _mask fields. For example samples could be selected if
+they are store AND SIMD by setting 'store_filter=1,simd_filter=1,
+store_filter_mask=1,simd_filter_mask=1'. The new masks are as follows:
+
+ branch_filter_mask=1 - Change branch filter behavior from OR to AND (PMSFCR.Bm)
+ load_filter_mask=1 - Change load filter behavior from OR to AND (PMSFCR.LDm)
+ store_filter_mask=1 - Change store filter behavior from OR to AND (PMSFCR.STm)
+ simd_filter_mask=1 - Change SIMD filter behavior from OR to AND (PMSFCR.SIMDm)
+ float_filter_mask=1 - Change floating point filter behavior from OR to AND (PMSFCR.FPm)
+
Viewing the data
~~~~~~~~~~~~~~~~~
@@ -191,19 +254,29 @@ groups:
36 branch
0 remote-access
900 memory
+ 1800 instructions
The arm_spe// and dummy:u events are implementation details and are expected to be empty.
-To get a full list of unique samples that are not sorted into groups, set the itrace option to
-generate 'instruction' samples. The period option is also taken into account, so set it to 1
-instruction unless you want to further downsample the already sampled SPE data:
+The instructions group contains the full list of unique samples that are not
+sorted into other groups. To generate only this group use --itrace=i1i.
- perf report --itrace=i1i
+1i (1 instruction interval) signifies no further downsampling. Rather than an
+instruction interval, this generates a sample every n SPE samples. For example
+to generate the default set of events for every 100 SPE samples:
+
+ perf report --itrace==bxofmtMai100i
+
+Other period types, for example nanoseconds (ns) are not currently supported.
Memory access details are also stored on the samples and this can be viewed with:
perf report --mem-mode
+The latency value from the SPE sample is stored in the 'weight' field of the
+Perf samples and can be displayed in Perf script and report outputs by enabling
+its display from the command line.
+
Common errors
~~~~~~~~~~~~~
@@ -247,6 +320,25 @@ to minimize output. Then run perf stat:
perf record -e arm_spe/discard/ -a -N -B --no-bpf-event -o - > /dev/null &
perf stat -e SAMPLE_FEED_LD
+Data source filtering
+~~~~~~~~~~~~~~~~~~~~~
+
+When FEAT_SPE_FDS is present, 'inv_data_src_filter' can be used as a mask to
+filter on a subset (0 - 63) of possible data source IDs. The full range of data
+sources is 0 - 65535 although these are unlikely to be used in practice. Data
+sources are IMPDEF so refer to the TRM for the mappings. Each bit N of the
+filter maps to data source N. The filter is an OR of all the bits, and the value
+provided inv_data_src_filter is inverted before writing to PMSDSFR_EL1 so that
+set bits exclude that data source and cleared bits include that data source.
+Therefore the default value of 0 is equivalent to no filtering (all data sources
+included).
+
+For example, to include only data sources 0 and 3, clear bits 0 and 3
+(0xFFFFFFFFFFFFFFF6)
+
+When 'inv_data_src_filter' is set to 0xFFFFFFFFFFFFFFFF, any samples with any
+data source set are excluded.
+
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index 8331bd28b10e..1160224cb718 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -177,11 +177,21 @@ Suite for evaluating performance of simple memory copy in various ways.
Options of *memcpy*
^^^^^^^^^^^^^^^^^^^
--l::
+-s::
--size::
Specify size of memory to copy (default: 1MB).
Available units are B, KB, MB, GB and TB (case insensitive).
+-p::
+--page::
+Specify page-size for mapping memory buffers (default: 4KB).
+Available values are 4KB, 2MB, 1GB (case insensitive).
+
+-k::
+--chunk::
+Specify the chunk-size for each invocation. (default: 0, or full-extent)
+Available units are B, KB, MB, GB and TB (case insensitive).
+
-f::
--function::
Specify function to copy (default: default).
@@ -201,11 +211,21 @@ Suite for evaluating performance of simple memory set in various ways.
Options of *memset*
^^^^^^^^^^^^^^^^^^^
--l::
+-s::
--size::
Specify size of memory to set (default: 1MB).
Available units are B, KB, MB, GB and TB (case insensitive).
+-p::
+--page::
+Specify page-size for mapping memory buffers (default: 4KB).
+Available values are 4KB, 2MB, 1GB (case insensitive).
+
+-k::
+--chunk::
+Specify the chunk-size for each invocation. (default: 0, or full-extent)
+Available units are B, KB, MB, GB and TB (case insensitive).
+
-f::
--function::
Specify function to set (default: default).
@@ -220,6 +240,40 @@ Repeat memset invocation this number of times.
--cycles::
Use perf's cpu-cycles event instead of gettimeofday syscall.
+*mmap*::
+Suite for evaluating memory subsystem performance for mmap()'d memory.
+
+Options of *mmap*
+^^^^^^^^^^^^^^^^^
+-s::
+--size::
+Specify size of memory to set (default: 1MB).
+Available units are B, KB, MB, GB and TB (case insensitive).
+
+-p::
+--page::
+Specify page-size for mapping memory buffers (default: 4KB).
+Available values are 4KB, 2MB, 1GB (case insensitive).
+
+-r::
+--randomize::
+Specify seed to randomize page access offset (default: 0, or not randomized).
+
+-f::
+--function::
+Specify function to set (default: all).
+Available functions are 'demand' and 'populate', with the first
+demand faulting pages in the region and the second using an eager
+mapping.
+
+-l::
+--nr_loops::
+Repeat mmap() invocation this number of times.
+
+-c::
+--cycles::
+Use perf's cpu-cycles event instead of gettimeofday syscall.
+
SUITES FOR 'numa'
~~~~~~~~~~~~~~~~~
*mem*::
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index f4af2dd6ab31..40b0f71a2c44 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -143,6 +143,13 @@ REPORT OPTIONS
feature, which causes cacheline sharing to behave like the cacheline
size is doubled.
+-M::
+--disassembler-style=::
+ Set disassembler style for objdump.
+
+--objdump=<path>::
+ Path to objdump binary.
+
C2C RECORD
----------
The perf c2c record command setup options related to HITM cacheline analysis
diff --git a/tools/perf/Documentation/perf-check.txt b/tools/perf/Documentation/perf-check.txt
index ee92042082f7..09e1d35677f5 100644
--- a/tools/perf/Documentation/perf-check.txt
+++ b/tools/perf/Documentation/perf-check.txt
@@ -50,12 +50,12 @@ feature::
dwarf / HAVE_LIBDW_SUPPORT
dwarf_getlocations / HAVE_LIBDW_SUPPORT
dwarf-unwind / HAVE_DWARF_UNWIND_SUPPORT
- auxtrace / HAVE_AUXTRACE_SUPPORT
libbfd / HAVE_LIBBFD_SUPPORT
libbpf-strings / HAVE_LIBBPF_STRINGS_SUPPORT
libcapstone / HAVE_LIBCAPSTONE_SUPPORT
libdw-dwarf-unwind / HAVE_LIBDW_SUPPORT
libelf / HAVE_LIBELF_SUPPORT
+ libLLVM / HAVE_LIBLLVM_SUPPORT
libnuma / HAVE_LIBNUMA_SUPPORT
libopencsd / HAVE_CSTRACE_SUPPORT
libperl / HAVE_LIBPERL_SUPPORT
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index c6f335659667..642d1c490d9e 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -452,6 +452,9 @@ call-graph.*::
kernel space is controlled not by this option but by the
kernel config (CONFIG_UNWINDER_*).
+ The 'defer' mode can be used with 'fp' mode to enable deferred
+ user callchains (like 'fp,defer').
+
call-graph.dump-size::
The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
When using dwarf into record-mode, the default size will be used if omitted.
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index f3067a4af294..58efab72d2e5 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -285,7 +285,7 @@ If specified the 'Weighted diff' column is displayed with value 'd' computed as:
- period being the hist entry period value
- - WEIGHT-A/WEIGHT-B being user supplied weights in the the '-c' option
+ - WEIGHT-A/WEIGHT-B being user supplied weights in the '-c' option
behind ':' separator like '-c wdiff:1,2'.
- WEIGHT-A being the weight of the data file
- WEIGHT-B being the weight of the baseline data file
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 28215306a78a..a4378a0cd914 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -73,6 +73,7 @@ counted. The following modifiers exist:
e - group or event are exclusive and do not share the PMU
b - use BPF aggregration (see perf stat --bpf-counters)
R - retire latency value of the event
+ X - don't regroup the event to match PMUs
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier can be specified multiple times:
@@ -392,6 +393,8 @@ Support raw format:
. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of
a certain kind of events.
+include::intel-acr.txt[]
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-top[1],
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 067891bd7da6..e8b9aadbbfa5 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -325,6 +325,10 @@ OPTIONS
by default. User can change the number by passing it after comma
like "--call-graph fp,32".
+ Also "defer" can be used with "fp" (like "--call-graph fp,defer") to
+ enable deferred user callchain which will collect user-space callchains
+ when the thread returns to the user space.
+
-q::
--quiet::
Don't print any warnings or messages, useful for scripting.
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 28bec7e78bc8..03d112960632 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -527,6 +527,11 @@ include::itrace.txt[]
The known limitations include exception handing such as
setjmp/longjmp will have calls/returns not match.
+--merge-callchains::
+ Enable merging deferred user callchains if available. This is the
+ default behavior. If you want to see separate CALLCHAIN_DEFERRED
+ records for some reason, use --no-merge-callchains explicitly.
+
:GMEXAMPLECMD: script
:GMEXAMPLESUBCMD:
include::guest-files.txt[]
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index ef0c7565bd5c..ef2281c56743 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -94,6 +94,9 @@ RECORD OPTIONS
-g::
--callchain::
Do call-graph (stack chain/backtrace) recording
+-o::
+--output=::
+ Select the output file (default: perf.data)
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 973fede403a0..892c82a9bf40 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -249,6 +249,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
works well with -s/--summary option where no argument information is
required.
+--max-summary=N::
+ Maximum number of lines in the summary mode. Note that this applies to
+ each entry (thread or cgroup).
+
PAGEFAULTS
----------
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index cd95ba09f727..c9d4dec65344 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -348,6 +348,16 @@ to special needs.
struct perf_bpil, which contains detailed information about
a BPF program, including type, id, tag, jited/xlated instructions, etc.
+The format of data in HEADER_BPF_PROG_INFO is as follows:
+ u32 count
+
+ struct perf_bpil {
+ u32 info_len; /* size of struct bpf_prog_info, when the tool is compiled */
+ u32 data_len; /* total bytes allocated for data, round up to 8 bytes */
+ u64 arrays; /* which arrays are included in data */
+ struct bpf_prog_info info;
+ u8 data[];
+ }[count];
HEADER_BPF_BTF = 26,
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 5a5832ee7b53..bd9f4804d56b 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -23,8 +23,39 @@ HOSTCFLAGS := $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
# borrowed from kernel headers depends on it, e.g. put_unaligned_*().
CFLAGS += -fno-strict-aliasing
-# Enabled Wthread-safety analysis for clang builds.
+# Set target flag and options when using clang as compiler.
ifeq ($(CC_NO_CLANG), 0)
+ CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi
+ CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu
+ CLANG_TARGET_FLAGS_m68k := m68k-linux-gnu
+ CLANG_TARGET_FLAGS_mips := mipsel-linux-gnu
+ CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu
+ CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu
+ CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu
+ CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu
+ CLANG_TARGET_FLAGS_x86_64 := x86_64-linux-gnu
+
+ # Default to host architecture if ARCH is not explicitly given.
+ ifeq ($(ARCH), $(HOSTARCH))
+ CLANG_TARGET_FLAGS := $(shell $(CLANG) -print-target-triple)
+ else
+ CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH))
+ endif
+
+ ifeq ($(CROSS_COMPILE),)
+ ifeq ($(CLANG_TARGET_FLAGS),)
+ $(error Specify CROSS_COMPILE or add CLANG_TARGET_FLAGS for $(ARCH))
+ else
+ CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS)
+ endif # CLANG_TARGET_FLAGS
+ else
+ CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%))
+ endif # CROSS_COMPILE
+
+ CC := $(CLANG) $(CLANG_FLAGS) -fintegrated-as
+ CXX := $(CXX) $(CLANG_FLAGS) -fintegrated-as
+
+ # Enabled Wthread-safety analysis for clang builds.
CFLAGS += -Wthread-safety
endif
@@ -323,9 +354,6 @@ FEATURE_CHECK_LDFLAGS-libpython := $(PYTHON_EMBED_LDOPTS)
FEATURE_CHECK_LDFLAGS-libaio = -lrt
-FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
-FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
-
CORE_CFLAGS += -fno-omit-frame-pointer
CORE_CFLAGS += -Wall
CORE_CFLAGS += -Wextra
@@ -417,10 +445,6 @@ ifeq ($(feature-eventfd), 1)
CFLAGS += -DHAVE_EVENTFD_SUPPORT
endif
-ifeq ($(feature-get_current_dir_name), 1)
- CFLAGS += -DHAVE_GET_CURRENT_DIR_NAME
-endif
-
ifeq ($(feature-gettid), 1)
CFLAGS += -DHAVE_GETTID
endif
@@ -600,13 +624,6 @@ ifndef NO_LIBELF
LIBBPF_INCLUDE = $(LIBBPF_DIR)/..
endif
endif
-
- FEATURE_CHECK_CFLAGS-libbpf-strings="-I$(LIBBPF_INCLUDE)"
- $(call feature_check,libbpf-strings)
- ifeq ($(feature-libbpf-strings), 1)
- $(call detected,CONFIG_LIBBPF_STRINGS)
- CFLAGS += -DHAVE_LIBBPF_STRINGS_SUPPORT
- endif
endif
endif # NO_LIBBPF
endif # NO_LIBELF
@@ -784,15 +801,10 @@ endif
ifndef NO_SLANG
ifneq ($(feature-libslang), 1)
- ifneq ($(feature-libslang-include-subdir), 1)
- $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev)
- NO_SLANG := 1
- else
- CFLAGS += -DHAVE_SLANG_INCLUDE_SUBDIR
- endif
+ $(warning slang not found, disables TUI support. Please install slang-devel, libslang-dev or libslang2-dev)
+ NO_SLANG := 1
endif
ifndef NO_SLANG
- # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
CFLAGS += -DHAVE_SLANG_SUPPORT
EXTLIBS += -lslang
$(call detected,CONFIG_SLANG)
@@ -817,9 +829,7 @@ ifdef GTK2
endif
endif
-ifdef NO_LIBPERL
- CFLAGS += -DNO_LIBPERL
-else
+ifdef LIBPERL
PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
@@ -829,17 +839,13 @@ else
PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
+ $(call feature_check,libperl)
ifneq ($(feature-libperl), 1)
- CFLAGS += -DNO_LIBPERL
- NO_LIBPERL := 1
- $(warning Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev)
+ $(error Missing perl devel files. Please install perl-ExtUtils-Embed/libperl-dev)
else
LDFLAGS += $(PERL_EMBED_LDFLAGS)
EXTLIBS += $(PERL_EMBED_LIBADD)
CFLAGS += -DHAVE_LIBPERL_SUPPORT
- ifeq ($(CC_NO_CLANG), 0)
- CFLAGS += -Wno-compound-token-split-by-macro
- endif
$(call detected,CONFIG_LIBPERL)
endif
endif
@@ -921,6 +927,8 @@ ifdef BUILD_NONDISTRO
ifeq ($(feature-libbfd), 1)
EXTLIBS += -lbfd -lopcodes
+ FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes -ldl
+ FEATURE_CHECK_LDFLAGS-disassembler-init-styled = -lbfd -lopcodes -ldl
else
# we are on a system that requires -liberty and (maybe) -lz
# to link against -lbfd; test each case individually here
@@ -947,6 +955,7 @@ ifdef BUILD_NONDISTRO
CFLAGS += -DHAVE_LIBBFD_SUPPORT
CXXFLAGS += -DHAVE_LIBBFD_SUPPORT
+ $(call detected,CONFIG_LIBBFD)
$(call feature_check,libbfd-buildid)
@@ -955,6 +964,14 @@ ifdef BUILD_NONDISTRO
else
$(warning Old version of libbfd/binutils things like PE executable profiling will not be available)
endif
+
+ ifeq ($(feature-disassembler-four-args), 1)
+ CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
+ endif
+
+ ifeq ($(feature-disassembler-init-styled), 1)
+ CFLAGS += -DDISASM_INIT_STYLED
+ endif
endif
ifndef NO_LIBLLVM
@@ -1046,14 +1063,6 @@ ifdef HAVE_KVM_STAT_SUPPORT
CFLAGS += -DHAVE_KVM_STAT_SUPPORT
endif
-ifeq ($(feature-disassembler-four-args), 1)
- CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE
-endif
-
-ifeq ($(feature-disassembler-init-styled), 1)
- CFLAGS += -DDISASM_INIT_STYLED
-endif
-
ifeq (${IS_64_BIT}, 1)
ifndef NO_PERF_READ_VDSO32
$(call feature_check,compile-32)
@@ -1103,19 +1112,6 @@ ifndef NO_CAPSTONE
endif
endif
-ifndef NO_AUXTRACE
- ifeq ($(SRCARCH),x86)
- ifeq ($(feature-get_cpuid), 0)
- $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc)
- NO_AUXTRACE := 1
- endif
- endif
- ifndef NO_AUXTRACE
- $(call detected,CONFIG_AUXTRACE)
- CFLAGS += -DHAVE_AUXTRACE_SUPPORT
- endif
-endif
-
ifdef EXTRA_TESTS
$(call detected,CONFIG_EXTRA_TESTS)
CFLAGS += -DHAVE_EXTRA_TESTS
@@ -1181,20 +1177,6 @@ ifneq ($(NO_LIBTRACEEVENT),1)
else
$(error ERROR: libtraceevent is missing. Please install libtraceevent-dev/libtraceevent-devel and/or set LIBTRACEEVENT_DIR or build with NO_LIBTRACEEVENT=1)
endif
-
- ifeq ($(feature-libtracefs), 1)
- CFLAGS += $(shell $(PKG_CONFIG) --cflags libtracefs)
- LDFLAGS += $(shell $(PKG_CONFIG) --libs-only-L libtracefs)
- EXTLIBS += $(shell $(PKG_CONFIG) --libs-only-l libtracefs)
- LIBTRACEFS_VERSION := $(shell $(PKG_CONFIG) --modversion libtracefs).0.0
- LIBTRACEFS_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEFS_VERSION)))
- LIBTRACEFS_VERSION_2 := $(word 2, $(subst ., ,$(LIBTRACEFS_VERSION)))
- LIBTRACEFS_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEFS_VERSION)))
- LIBTRACEFS_VERSION_CPP := $(shell expr $(LIBTRACEFS_VERSION_1) \* 255 \* 255 + $(LIBTRACEFS_VERSION_2) \* 255 + $(LIBTRACEFS_VERSION_3))
- CFLAGS += -DLIBTRACEFS_VERSION=$(LIBTRACEFS_VERSION_CPP)
- else
- $(warning libtracefs is missing. Please install libtracefs-dev/libtracefs-devel)
- endif
endif
# Among the variables below, these:
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index e2150acc2c13..b3f481a626af 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -17,7 +17,7 @@ include ../scripts/utilities.mak
#
# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
#
-# Define NO_LIBPERL to disable perl script extension.
+# Define LIBPERL to enable perl script extension.
#
# Define NO_LIBPYTHON to disable python script extension.
#
@@ -84,8 +84,6 @@ include ../scripts/utilities.mak
#
# Define NO_LZMA if you do not want to support compressed (xz) kernel modules
#
-# Define NO_AUXTRACE if you do not want AUX area tracing support
-#
# Define NO_LIBBPF if you do not want BPF support
#
# Define NO_LIBCAP if you do not want process capabilities considered by perf
@@ -194,7 +192,7 @@ else
# paths are used instead.
ifdef CROSS_COMPILE
ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),)
- CROSS_ARCH = $(shell $(CC) -dumpmachine)
+ CROSS_ARCH = $(notdir $(CROSS_COMPILE:%-=%))
PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/
PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/
PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/
@@ -234,12 +232,12 @@ endif
# The fixdep build - we force fixdep tool to be built as
# the first target in the separate make session not to be
# disturbed by any parallel make jobs. Once fixdep is done
-# we issue the requested build with FIXDEP=1 variable.
+# we issue the requested build with FIXDEP_BUILT=1 variable.
#
# The fixdep build is disabled for $(NON_CONFIG_TARGETS)
# targets, because it's not necessary.
-ifdef FIXDEP
+ifdef FIXDEP_BUILT
force_fixdep := 0
else
force_fixdep := $(config)
@@ -286,7 +284,7 @@ $(goals) all: sub-make
sub-make: fixdep
@./check-headers.sh
- $(Q)$(MAKE) FIXDEP=1 -f Makefile.perf $(goals)
+ $(Q)$(MAKE) FIXDEP_BUILT=1 -f Makefile.perf $(goals)
else # force_fixdep
@@ -941,7 +939,7 @@ $(OUTPUT)dlfilters/%.so: $(OUTPUT)dlfilters/%.o
ifndef NO_JVMTI
LIBJVMTI_IN := $(OUTPUT)jvmti/jvmti-in.o
-$(LIBJVMTI_IN): FORCE
+$(LIBJVMTI_IN): prepare FORCE
$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=jvmti obj=jvmti
$(OUTPUT)$(LIBJVMTI): $(LIBJVMTI_IN)
@@ -1103,7 +1101,7 @@ endif
$(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
$(call QUIET_INSTALL, perf-iostat) \
$(INSTALL) $(OUTPUT)perf-iostat -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
-ifndef NO_LIBPERL
+ifdef LIBPERL
$(call QUIET_INSTALL, perl-scripts) \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
$(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -m 644 -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
@@ -1272,9 +1270,24 @@ endif # CONFIG_PERF_BPF_SKEL
bpf-skel-clean:
$(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) $(SKEL_OUT)/vmlinux.h
+pmu-events-clean:
+ifeq ($(OUTPUT),)
+ $(call QUIET_CLEAN, pmu-events) $(RM) \
+ pmu-events/pmu-events.c \
+ pmu-events/metric_test.log \
+ pmu-events/test-empty-pmu-events.c \
+ pmu-events/empty-pmu-events.log
+else # When an OUTPUT directory is present, clean up the copied pmu-events/arch directory.
+ $(call QUIET_CLEAN, pmu-events) $(RM) -r $(OUTPUT)pmu-events/arch \
+ $(OUTPUT)pmu-events/pmu-events.c \
+ $(OUTPUT)pmu-events/metric_test.log \
+ $(OUTPUT)pmu-events/test-empty-pmu-events.c \
+ $(OUTPUT)pmu-events/empty-pmu-events.log
+endif
+
clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean \
arm64-sysreg-defs-clean fixdep-clean python-clean bpf-skel-clean \
- tests-coresight-targets-clean
+ tests-coresight-targets-clean pmu-events-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive \
$(OUTPUT)perf-iostat $(LANG_BINDINGS)
$(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '*.a' -delete -o \
@@ -1287,10 +1300,6 @@ clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(
$(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* \
$(OUTPUT)util/intel-pt-decoder/inat-tables.c \
$(OUTPUT)tests/llvm-src-{base,kbuild,prologue,relocation}.c \
- $(OUTPUT)pmu-events/pmu-events.c \
- $(OUTPUT)pmu-events/test-empty-pmu-events.c \
- $(OUTPUT)pmu-events/empty-pmu-events.log \
- $(OUTPUT)pmu-events/metric_test.log \
$(OUTPUT)$(fadvise_advice_array) \
$(OUTPUT)$(fsconfig_arrays) \
$(OUTPUT)$(fsmount_arrays) \
diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c
index cf91a43362b0..5e667b0f5512 100644
--- a/tools/perf/arch/arm/annotate/instructions.c
+++ b/tools/perf/arch/arm/annotate/instructions.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
#include <linux/zalloc.h>
+#include <errno.h>
#include <sys/types.h>
#include <regex.h>
#include <stdlib.h>
diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
index 27c1d5ebcd91..b07e699aaa3c 100644
--- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl
@@ -482,3 +482,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build
index f7a8b37d1c68..fd695e1fdaee 100644
--- a/tools/perf/arch/arm/util/Build
+++ b/tools/perf/arch/arm/util/Build
@@ -3,4 +3,4 @@ perf-util-y += perf_regs.o
perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
-perf-util-$(CONFIG_AUXTRACE) += pmu.o auxtrace.o cs-etm.o
+perf-util-y += pmu.o auxtrace.o cs-etm.o
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index 3b8eca0ffb17..eb6404267f17 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -5,6 +5,7 @@
*/
#include <dirent.h>
+#include <errno.h>
#include <stdbool.h>
#include <linux/coresight-pmu.h>
#include <linux/zalloc.h>
diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c
index f70075c89aa0..9be8da5207f5 100644
--- a/tools/perf/arch/arm/util/pmu.c
+++ b/tools/perf/arch/arm/util/pmu.c
@@ -20,7 +20,6 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
{
struct perf_cpu_map *intersect, *online = cpu_map__online();
-#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
/* add ETM default config here */
pmu->auxtrace = true;
@@ -39,7 +38,6 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
pmu->selectable = true;
#endif
}
-#endif
/* Workaround some ARM PMU's failing to correctly set CPU maps for online processors. */
intersect = perf_cpu_map__intersect(online, pmu->cpus);
perf_cpu_map__put(online);
diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c
index d465d093e7eb..16cb62d40bd9 100644
--- a/tools/perf/arch/arm64/annotate/instructions.c
+++ b/tools/perf/arch/arm64/annotate/instructions.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/compiler.h>
+#include <errno.h>
#include <sys/types.h>
#include <regex.h>
#include <stdlib.h>
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index a74521b79eaa..d63881081d2e 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,13 +1,14 @@
+perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
+perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
+perf-util-y += ../../arm/util/auxtrace.o
+perf-util-y += ../../arm/util/cs-etm.o
+perf-util-y += ../../arm/util/pmu.o
+perf-util-y += arm-spe.o
perf-util-y += header.o
+perf-util-y += hisi-ptt.o
perf-util-y += machine.o
+perf-util-y += mem-events.o
perf-util-y += perf_regs.o
-perf-util-y += tsc.o
perf-util-y += pmu.o
-perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
-perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
-perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
-
-perf-util-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \
- ../../arm/util/auxtrace.o \
- ../../arm/util/cs-etm.o \
- arm-spe.o mem-events.o hisi-ptt.o
+perf-util-y += tsc.o
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 4f2833b62ff5..d5ec1408d0ae 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -10,6 +10,7 @@
#include <linux/log2.h>
#include <linux/string.h>
#include <linux/zalloc.h>
+#include <errno.h>
#include <time.h>
#include "../../../util/cpumap.h"
@@ -121,12 +122,17 @@ static int arm_spe_save_cpu_header(struct auxtrace_record *itr,
/* No Arm SPE PMU is found */
data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX;
data[ARM_SPE_CAP_MIN_IVAL] = 0;
+ data[ARM_SPE_CAP_EVENT_FILTER] = 0;
} else {
data[ARM_SPE_CPU_PMU_TYPE] = pmu->type;
if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1)
val = 0;
data[ARM_SPE_CAP_MIN_IVAL] = val;
+
+ if (perf_pmu__scan_file(pmu, "caps/event_filter", "%lx", &val) != 1)
+ val = 0;
+ data[ARM_SPE_CAP_EVENT_FILTER] = val;
}
free(cpuid);
diff --git a/tools/perf/arch/arm64/util/arm64_exception_types.h b/tools/perf/arch/arm64/util/arm64_exception_types.h
index 27c981ebe401..bf827f19ace0 100644
--- a/tools/perf/arch/arm64/util/arm64_exception_types.h
+++ b/tools/perf/arch/arm64/util/arm64_exception_types.h
@@ -31,9 +31,10 @@
#define ESR_ELx_EC_FP_ASIMD (0x07)
#define ESR_ELx_EC_CP10_ID (0x08) /* EL2 only */
#define ESR_ELx_EC_PAC (0x09) /* EL2 and above */
-/* Unallocated EC: 0x0A - 0x0B */
+#define ESR_ELx_EC_OTHER (0x0A)
+/* Unallocated EC: 0x0B */
#define ESR_ELx_EC_CP14_64 (0x0C)
-/* Unallocated EC: 0x0d */
+#define ESR_ELx_EC_BTI (0x0D)
#define ESR_ELx_EC_ILL (0x0E)
/* Unallocated EC: 0x0F - 0x10 */
#define ESR_ELx_EC_SVC32 (0x11)
@@ -46,7 +47,10 @@
#define ESR_ELx_EC_SYS64 (0x18)
#define ESR_ELx_EC_SVE (0x19)
#define ESR_ELx_EC_ERET (0x1a) /* EL2 only */
-/* Unallocated EC: 0x1b - 0x1E */
+/* Unallocated EC: 0x1B */
+#define ESR_ELx_EC_FPAC (0x1C) /* EL1 and above */
+#define ESR_ELx_EC_SME (0x1D)
+/* Unallocated EC: 0x1E */
#define ESR_ELx_EC_IMP_DEF (0x1f) /* EL3 only */
#define ESR_ELx_EC_IABT_LOW (0x20)
#define ESR_ELx_EC_IABT_CUR (0x21)
@@ -55,11 +59,12 @@
#define ESR_ELx_EC_DABT_LOW (0x24)
#define ESR_ELx_EC_DABT_CUR (0x25)
#define ESR_ELx_EC_SP_ALIGN (0x26)
-/* Unallocated EC: 0x27 */
+#define ESR_ELx_EC_MOPS (0x27)
#define ESR_ELx_EC_FP_EXC32 (0x28)
/* Unallocated EC: 0x29 - 0x2B */
#define ESR_ELx_EC_FP_EXC64 (0x2C)
-/* Unallocated EC: 0x2D - 0x2E */
+#define ESR_ELx_EC_GCS (0x2D)
+/* Unallocated EC: 0x2E */
#define ESR_ELx_EC_SERROR (0x2F)
#define ESR_ELx_EC_BREAKPT_LOW (0x30)
#define ESR_ELx_EC_BREAKPT_CUR (0x31)
diff --git a/tools/perf/arch/arm64/util/hisi-ptt.c b/tools/perf/arch/arm64/util/hisi-ptt.c
index eac9739c87e6..fe457fd58c9e 100644
--- a/tools/perf/arch/arm64/util/hisi-ptt.c
+++ b/tools/perf/arch/arm64/util/hisi-ptt.c
@@ -9,6 +9,7 @@
#include <linux/bitops.h>
#include <linux/log2.h>
#include <linux/zalloc.h>
+#include <errno.h>
#include <time.h>
#include <internal/lib.h> // page_size
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
index 1e8c44c7b614..7a7049c2c307 100644
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -382,3 +382,5 @@
465 n64 listxattrat sys_listxattrat
466 n64 removexattrat sys_removexattrat
467 n64 open_tree_attr sys_open_tree_attr
+468 n64 file_getattr sys_file_getattr
+469 n64 file_setattr sys_file_setattr
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 9a084bdb8926..b453e80dfc00 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -558,3 +558,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index fdd6a77a3432..3d0d5427aef7 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -10,3 +10,4 @@ perf-util-$(CONFIG_LIBDW) += skip-callchain-idx.o
perf-util-$(CONFIG_LIBUNWIND) += unwind-libunwind.o
perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
+perf-util-y += auxtrace.o
diff --git a/tools/perf/arch/powerpc/util/auxtrace.c b/tools/perf/arch/powerpc/util/auxtrace.c
new file mode 100644
index 000000000000..292ea335e4ff
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/auxtrace.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * VPA support
+ */
+#include <errno.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/string.h>
+
+#include "../../util/evlist.h"
+#include "../../util/debug.h"
+#include "../../util/auxtrace.h"
+#include "../../util/powerpc-vpadtl.h"
+#include "../../util/record.h"
+#include <internal/lib.h> // page_size
+
+#define KiB(x) ((x) * 1024)
+
+static int
+powerpc_vpadtl_recording_options(struct auxtrace_record *ar __maybe_unused,
+ struct evlist *evlist __maybe_unused,
+ struct record_opts *opts)
+{
+ opts->full_auxtrace = true;
+
+ /*
+ * Set auxtrace_mmap_pages to minimum
+ * two pages
+ */
+ if (!opts->auxtrace_mmap_pages) {
+ opts->auxtrace_mmap_pages = KiB(128) / page_size;
+ if (opts->mmap_pages == UINT_MAX)
+ opts->mmap_pages = KiB(256) / page_size;
+ }
+
+ return 0;
+}
+
+static size_t powerpc_vpadtl_info_priv_size(struct auxtrace_record *itr __maybe_unused,
+ struct evlist *evlist __maybe_unused)
+{
+ return VPADTL_AUXTRACE_PRIV_SIZE;
+}
+
+static int
+powerpc_vpadtl_info_fill(struct auxtrace_record *itr __maybe_unused,
+ struct perf_session *session __maybe_unused,
+ struct perf_record_auxtrace_info *auxtrace_info,
+ size_t priv_size __maybe_unused)
+{
+ auxtrace_info->type = PERF_AUXTRACE_VPA_DTL;
+
+ return 0;
+}
+
+static void powerpc_vpadtl_free(struct auxtrace_record *itr)
+{
+ free(itr);
+}
+
+static u64 powerpc_vpadtl_reference(struct auxtrace_record *itr __maybe_unused)
+{
+ return 0;
+}
+
+struct auxtrace_record *auxtrace_record__init(struct evlist *evlist,
+ int *err)
+{
+ struct auxtrace_record *aux;
+ struct evsel *pos;
+ int found = 0;
+
+ evlist__for_each_entry(evlist, pos) {
+ if (strstarts(pos->name, "vpa_dtl")) {
+ found = 1;
+ pos->needs_auxtrace_mmap = true;
+ break;
+ }
+ }
+
+ if (!found)
+ return NULL;
+
+ /*
+ * To obtain the auxtrace buffer file descriptor, the auxtrace event
+ * must come first.
+ */
+ evlist__to_front(pos->evlist, pos);
+
+ aux = zalloc(sizeof(*aux));
+ if (aux == NULL) {
+ pr_debug("aux record is NULL\n");
+ *err = -ENOMEM;
+ return NULL;
+ }
+
+ aux->recording_options = powerpc_vpadtl_recording_options;
+ aux->info_priv_size = powerpc_vpadtl_info_priv_size;
+ aux->info_fill = powerpc_vpadtl_info_fill;
+ aux->free = powerpc_vpadtl_free;
+ aux->reference = powerpc_vpadtl_reference;
+ return aux;
+}
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index a4569b96ef06..8a6744d658db 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -470,3 +470,5 @@
465 common listxattrat sys_listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr sys_file_setattr
diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build
index 736c0ad09194..c64eb18dbdae 100644
--- a/tools/perf/arch/s390/util/Build
+++ b/tools/perf/arch/s390/util/Build
@@ -7,4 +7,4 @@ perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
perf-util-y += machine.o
perf-util-y += pmu.o
-perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
+perf-util-y += auxtrace.o
diff --git a/tools/perf/arch/s390/util/auxtrace.c b/tools/perf/arch/s390/util/auxtrace.c
index 5068baa3e092..1a3676145066 100644
--- a/tools/perf/arch/s390/util/auxtrace.c
+++ b/tools/perf/arch/s390/util/auxtrace.c
@@ -1,3 +1,4 @@
+#include <errno.h>
#include <stdbool.h>
#include <stdlib.h>
#include <linux/kernel.h>
diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
index 52a7652fcff6..5e9c9eff5539 100644
--- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl
@@ -471,3 +471,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
index 83e45eb6c095..ebb7d06d1044 100644
--- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl
@@ -513,3 +513,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c
index c6d403eae744..803f9351a3fb 100644
--- a/tools/perf/arch/x86/annotate/instructions.c
+++ b/tools/perf/arch/x86/annotate/instructions.c
@@ -248,6 +248,7 @@ static void update_insn_state_x86(struct type_state *state,
tsr = &state->regs[state->ret_reg];
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("call [%x] return -> reg%d",
@@ -284,6 +285,7 @@ static void update_insn_state_x86(struct type_state *state,
!strcmp(var_name, "this_cpu_off") &&
tsr->kind == TSR_KIND_CONST) {
tsr->kind = TSR_KIND_PERCPU_BASE;
+ tsr->offset = 0;
tsr->ok = true;
imm_value = tsr->imm_value;
}
@@ -291,6 +293,19 @@ static void update_insn_state_x86(struct type_state *state,
else
return;
+ /* Ignore add to non-pointer or non-const types */
+ if (tsr->kind == TSR_KIND_POINTER ||
+ (dwarf_tag(&tsr->type) == DW_TAG_pointer_type &&
+ src->reg1 != DWARF_REG_PC && tsr->kind == TSR_KIND_TYPE && !dst->mem_ref)) {
+ tsr->offset += imm_value;
+ pr_debug_dtp("add [%x] offset %#"PRIx64" to reg%d",
+ insn_offset, imm_value, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+
+ if (tsr->kind == TSR_KIND_CONST)
+ tsr->imm_value += imm_value;
+
if (tsr->kind != TSR_KIND_PERCPU_BASE)
return;
@@ -301,7 +316,8 @@ static void update_insn_state_x86(struct type_state *state,
* as a pointer.
*/
tsr->type = type_die;
- tsr->kind = TSR_KIND_POINTER;
+ tsr->kind = TSR_KIND_PERCPU_POINTER;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("add [%x] percpu %#"PRIx64" -> reg%d",
@@ -311,6 +327,135 @@ static void update_insn_state_x86(struct type_state *state,
return;
}
+ if (!strncmp(dl->ins.name, "sub", 3)) {
+ u64 imm_value = -1ULL;
+
+ if (!has_reg_type(state, dst->reg1))
+ return;
+
+ tsr = &state->regs[dst->reg1];
+ tsr->copied_from = -1;
+
+ if (src->imm)
+ imm_value = src->offset;
+ else if (has_reg_type(state, src->reg1) &&
+ state->regs[src->reg1].kind == TSR_KIND_CONST)
+ imm_value = state->regs[src->reg1].imm_value;
+
+ if (tsr->kind == TSR_KIND_POINTER ||
+ (dwarf_tag(&tsr->type) == DW_TAG_pointer_type &&
+ src->reg1 != DWARF_REG_PC && tsr->kind == TSR_KIND_TYPE && !dst->mem_ref)) {
+ tsr->offset -= imm_value;
+ pr_debug_dtp("sub [%x] offset %#"PRIx64" to reg%d",
+ insn_offset, imm_value, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+
+ if (tsr->kind == TSR_KIND_CONST)
+ tsr->imm_value -= imm_value;
+
+ return;
+ }
+
+ if (!strncmp(dl->ins.name, "lea", 3)) {
+ int sreg = src->reg1;
+ struct type_state_reg src_tsr;
+
+ if (!has_reg_type(state, sreg) ||
+ !has_reg_type(state, dst->reg1) ||
+ !src->mem_ref)
+ return;
+
+ src_tsr = state->regs[sreg];
+ tsr = &state->regs[dst->reg1];
+
+ tsr->copied_from = -1;
+ tsr->ok = false;
+
+ /* Case 1: Based on stack pointer or frame pointer */
+ if (sreg == fbreg || sreg == state->stack_reg) {
+ struct type_state_stack *stack;
+ int offset = src->offset - fboff;
+
+ stack = find_stack_state(state, offset);
+ if (!stack)
+ return;
+
+ tsr->type = stack->type;
+ tsr->kind = TSR_KIND_POINTER;
+ tsr->offset = offset - stack->offset;
+ tsr->ok = true;
+
+ if (sreg == fbreg) {
+ pr_debug_dtp("lea [%x] address of -%#x(stack) -> reg%d",
+ insn_offset, -src->offset, dst->reg1);
+ } else {
+ pr_debug_dtp("lea [%x] address of %#x(reg%d) -> reg%d",
+ insn_offset, src->offset, sreg, dst->reg1);
+ }
+
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ /* Case 2: Based on a register holding a typed pointer */
+ else if (src_tsr.ok && (src_tsr.kind == TSR_KIND_POINTER ||
+ (dwarf_tag(&src_tsr.type) == DW_TAG_pointer_type &&
+ src_tsr.kind == TSR_KIND_TYPE))) {
+
+ if (src_tsr.kind == TSR_KIND_TYPE &&
+ __die_get_real_type(&state->regs[sreg].type, &type_die) == NULL)
+ return;
+
+ if (src_tsr.kind == TSR_KIND_POINTER)
+ type_die = state->regs[sreg].type;
+
+ /* Check if the target type has a member at the new offset */
+ if (die_get_member_type(&type_die,
+ src->offset + src_tsr.offset, &type_die) == NULL)
+ return;
+
+ tsr->type = src_tsr.type;
+ tsr->kind = src_tsr.kind;
+ tsr->offset = src->offset + src_tsr.offset;
+ tsr->ok = true;
+
+ pr_debug_dtp("lea [%x] address of %s%#x(reg%d) -> reg%d",
+ insn_offset, src->offset < 0 ? "-" : "",
+ abs(src->offset), sreg, dst->reg1);
+
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
+ return;
+ }
+
+ /* Invalidate register states for other ops which may change pointers */
+ if (has_reg_type(state, dst->reg1) && !dst->mem_ref &&
+ dwarf_tag(&state->regs[dst->reg1].type) == DW_TAG_pointer_type) {
+ if (!strncmp(dl->ins.name, "imul", 4) || !strncmp(dl->ins.name, "mul", 3) ||
+ !strncmp(dl->ins.name, "idiv", 4) || !strncmp(dl->ins.name, "div", 3) ||
+ !strncmp(dl->ins.name, "shl", 3) || !strncmp(dl->ins.name, "shr", 3) ||
+ !strncmp(dl->ins.name, "sar", 3) || !strncmp(dl->ins.name, "and", 3) ||
+ !strncmp(dl->ins.name, "or", 2) || !strncmp(dl->ins.name, "neg", 3) ||
+ !strncmp(dl->ins.name, "inc", 3) || !strncmp(dl->ins.name, "dec", 3)) {
+ pr_debug_dtp("%s [%x] invalidate reg%d\n",
+ dl->ins.name, insn_offset, dst->reg1);
+ state->regs[dst->reg1].ok = false;
+ state->regs[dst->reg1].copied_from = -1;
+ return;
+ }
+
+ if (!strncmp(dl->ins.name, "xor", 3) && dst->reg1 == src->reg1) {
+ /* xor reg, reg clears the register */
+ pr_debug_dtp("xor [%x] clear reg%d\n",
+ insn_offset, dst->reg1);
+
+ state->regs[dst->reg1].kind = TSR_KIND_CONST;
+ state->regs[dst->reg1].imm_value = 0;
+ state->regs[dst->reg1].ok = true;
+ state->regs[dst->reg1].copied_from = -1;
+ return;
+ }
+ }
+
if (strncmp(dl->ins.name, "mov", 3))
return;
@@ -345,6 +490,7 @@ static void update_insn_state_x86(struct type_state *state,
if (var_addr == 40) {
tsr->kind = TSR_KIND_CANARY;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] stack canary -> reg%d\n",
@@ -361,6 +507,7 @@ static void update_insn_state_x86(struct type_state *state,
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] this-cpu addr=%#"PRIx64" -> reg%d",
@@ -372,6 +519,7 @@ static void update_insn_state_x86(struct type_state *state,
if (src->imm) {
tsr->kind = TSR_KIND_CONST;
tsr->imm_value = src->offset;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] imm=%#x -> reg%d\n",
@@ -388,10 +536,11 @@ static void update_insn_state_x86(struct type_state *state,
tsr->type = state->regs[src->reg1].type;
tsr->kind = state->regs[src->reg1].kind;
tsr->imm_value = state->regs[src->reg1].imm_value;
+ tsr->offset = state->regs[src->reg1].offset;
tsr->ok = true;
/* To copy back the variable type later (hopefully) */
- if (tsr->kind == TSR_KIND_TYPE)
+ if (tsr->kind == TSR_KIND_TYPE || tsr->kind == TSR_KIND_POINTER)
tsr->copied_from = src->reg1;
pr_debug_dtp("mov [%x] reg%d -> reg%d",
@@ -421,12 +570,14 @@ retry:
} else if (!stack->compound) {
tsr->type = stack->type;
tsr->kind = stack->kind;
+ tsr->offset = stack->ptr_offset;
tsr->ok = true;
} else if (die_get_member_type(&stack->type,
offset - stack->offset,
&type_die)) {
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
} else {
tsr->ok = false;
@@ -446,15 +597,30 @@ retry:
else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
state->regs[sreg].kind == TSR_KIND_TYPE &&
die_deref_ptr_type(&state->regs[sreg].type,
- src->offset, &type_die)) {
+ src->offset + state->regs[sreg].offset, &type_die)) {
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] %#x(reg%d) -> reg%d",
insn_offset, src->offset, sreg, dst->reg1);
pr_debug_type_name(&tsr->type, tsr->kind);
}
+ /* Handle dereference of TSR_KIND_POINTER registers */
+ else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
+ state->regs[sreg].kind == TSR_KIND_POINTER &&
+ die_get_member_type(&state->regs[sreg].type,
+ src->offset + state->regs[sreg].offset, &type_die)) {
+ tsr->type = state->regs[sreg].type;
+ tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = src->offset + state->regs[sreg].offset;
+ tsr->ok = true;
+
+ pr_debug_dtp("mov [%x] addr %#x(reg%d) -> reg%d",
+ insn_offset, src->offset, sreg, dst->reg1);
+ pr_debug_type_name(&tsr->type, tsr->kind);
+ }
/* Or check if it's a global variable */
else if (sreg == DWARF_REG_PC) {
struct map_symbol *ms = dloc->ms;
@@ -473,6 +639,7 @@ retry:
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] global addr=%"PRIx64" -> reg%d",
@@ -504,6 +671,7 @@ retry:
die_get_member_type(&type_die, offset, &type_die)) {
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
if (src->multi_regs) {
@@ -521,11 +689,12 @@ retry:
}
/* And then dereference the calculated pointer if it has one */
else if (has_reg_type(state, sreg) && state->regs[sreg].ok &&
- state->regs[sreg].kind == TSR_KIND_POINTER &&
+ state->regs[sreg].kind == TSR_KIND_PERCPU_POINTER &&
die_get_member_type(&state->regs[sreg].type,
src->offset, &type_die)) {
tsr->type = type_die;
tsr->kind = TSR_KIND_TYPE;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] pointer %#x(reg%d) -> reg%d",
@@ -548,6 +717,7 @@ retry:
&var_name, &offset) &&
!strcmp(var_name, "__per_cpu_offset")) {
tsr->kind = TSR_KIND_PERCPU_BASE;
+ tsr->offset = 0;
tsr->ok = true;
pr_debug_dtp("mov [%x] percpu base reg%d\n",
@@ -583,10 +753,10 @@ retry:
*/
if (!stack->compound)
set_stack_state(stack, offset, tsr->kind,
- &tsr->type);
+ &tsr->type, tsr->offset);
} else {
findnew_stack_state(state, offset, tsr->kind,
- &tsr->type);
+ &tsr->type, tsr->offset);
}
if (dst->reg1 == fbreg) {
@@ -596,6 +766,11 @@ retry:
pr_debug_dtp("mov [%x] reg%d -> %#x(reg%d)",
insn_offset, src->reg1, offset, dst->reg1);
}
+ if (tsr->offset != 0) {
+ pr_debug_dtp(" reg%d offset %#x ->",
+ src->reg1, tsr->offset);
+ }
+
pr_debug_type_name(&tsr->type, tsr->kind);
}
/*
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
index ac007ea00979..4877e16da69a 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl
@@ -473,3 +473,5 @@
465 i386 listxattrat sys_listxattrat
466 i386 removexattrat sys_removexattrat
467 i386 open_tree_attr sys_open_tree_attr
+468 i386 file_getattr sys_file_getattr
+469 i386 file_setattr sys_file_setattr
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index cfb5ca41e30d..ced2a1deecd7 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -345,6 +345,7 @@
333 common io_pgetevents sys_io_pgetevents
334 common rseq sys_rseq
335 common uretprobe sys_uretprobe
+336 common uprobe sys_uprobe
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
424 common pidfd_send_signal sys_pidfd_send_signal
@@ -391,6 +392,8 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
#
# Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 7790b3e20f4e..b017d1ca6e3c 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -3,9 +3,9 @@ perf-test-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
perf-test-y += arch-tests.o
perf-test-y += hybrid.o
-perf-test-$(CONFIG_AUXTRACE) += intel-pt-test.o
+perf-test-y += intel-pt-test.o
ifeq ($(CONFIG_EXTRA_TESTS),y)
-perf-test-$(CONFIG_AUXTRACE) += insn-x86.o
+perf-test-y += insn-x86.o
endif
perf-test-$(CONFIG_X86_64) += bp-modify.o
perf-test-y += amd-ibs-via-core-pmu.o
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index 8f9cfeaa170f..c3e1619c5e79 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -3,7 +3,6 @@
#include "tests/tests.h"
#include "arch-tests.h"
-#ifdef HAVE_AUXTRACE_SUPPORT
#ifdef HAVE_EXTRA_TESTS
DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86);
#endif
@@ -19,7 +18,6 @@ struct test_suite suite__intel_pt = {
.test_cases = intel_pt_tests,
};
-#endif
#if defined(__x86_64__)
DEFINE_SUITE("x86 bp modify", bp_modify);
#endif
@@ -39,12 +37,10 @@ struct test_suite *arch_tests[] = {
#ifdef HAVE_DWARF_UNWIND_SUPPORT
&suite__dwarf_unwind,
#endif
-#ifdef HAVE_AUXTRACE_SUPPORT
#ifdef HAVE_EXTRA_TESTS
&suite__insn_x86,
#endif
&suite__intel_pt,
-#endif
#if defined(__x86_64__)
&suite__bp_modify,
#endif
diff --git a/tools/perf/arch/x86/tests/intel-pt-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c
index b217ed67cd4e..970997759ec2 100644
--- a/tools/perf/arch/x86/tests/intel-pt-test.c
+++ b/tools/perf/arch/x86/tests/intel-pt-test.c
@@ -3,7 +3,6 @@
#include <linux/compiler.h>
#include <linux/bits.h>
#include <string.h>
-#include <cpuid.h>
#include <sched.h>
#include "intel-pt-decoder/intel-pt-pkt-decoder.h"
@@ -11,6 +10,7 @@
#include "debug.h"
#include "tests/tests.h"
#include "arch-tests.h"
+#include "../util/cpuid.h"
#include "cpumap.h"
/**
@@ -363,7 +363,7 @@ static int get_pt_caps(int cpu, struct pt_caps *caps)
memset(caps, 0, sizeof(*caps));
for (i = 0; i < INTEL_PT_SUBLEAF_CNT; i++) {
- __get_cpuid_count(20, i, &r.eax, &r.ebx, &r.ecx, &r.edx);
+ cpuid(20, i, &r.eax, &r.ebx, &r.ecx, &r.edx);
pr_debug("CPU %d CPUID leaf 20 subleaf %d\n", cpu, i);
pr_debug("eax = 0x%08x\n", r.eax);
pr_debug("ebx = 0x%08x\n", r.ebx);
@@ -380,7 +380,7 @@ static bool is_hybrid(void)
unsigned int eax, ebx, ecx, edx = 0;
bool result;
- __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
+ cpuid(7, 0, &eax, &ebx, &ecx, &edx);
result = edx & BIT(15);
pr_debug("Is %shybrid : CPUID leaf 7 subleaf 0 edx %#x (bit-15 indicates hybrid)\n",
result ? "" : "not ", edx);
diff --git a/tools/perf/arch/x86/tests/topdown.c b/tools/perf/arch/x86/tests/topdown.c
index 8d0ea7a4bbc1..3ee4e5e71be3 100644
--- a/tools/perf/arch/x86/tests/topdown.c
+++ b/tools/perf/arch/x86/tests/topdown.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include "arch-tests.h"
#include "../util/topdown.h"
+#include "debug.h"
#include "evlist.h"
#include "parse-events.h"
#include "pmu.h"
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index 06d7c0205b3d..c0dc5965f362 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -14,7 +14,7 @@ perf-util-y += iostat.o
perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
-perf-util-$(CONFIG_AUXTRACE) += auxtrace.o
+perf-util-y += auxtrace.o
perf-util-y += archinsn.o
-perf-util-$(CONFIG_AUXTRACE) += intel-pt.o
-perf-util-$(CONFIG_AUXTRACE) += intel-bts.o
+perf-util-y += intel-pt.o
+perf-util-y += intel-bts.o
diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c
index 9bc80fff3aa0..23a8e662a912 100644
--- a/tools/perf/arch/x86/util/evsel.c
+++ b/tools/perf/arch/x86/util/evsel.c
@@ -1,10 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
+#include "util/evlist.h"
#include "util/evsel.h"
+#include "util/evsel_config.h"
#include "util/env.h"
#include "util/pmu.h"
#include "util/pmus.h"
+#include "util/stat.h"
+#include "util/strbuf.h"
#include "linux/string.h"
#include "topdown.h"
#include "evsel.h"
@@ -67,6 +72,57 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
event_name);
}
+void arch_evsel__apply_ratio_to_prev(struct evsel *evsel,
+ struct perf_event_attr *attr)
+{
+ struct perf_event_attr *prev_attr = NULL;
+ struct evsel *evsel_prev = NULL;
+ const char *name = "acr_mask";
+ int evsel_idx = 0;
+ __u64 ev_mask, pr_ev_mask;
+
+ if (!perf_pmu__has_format(evsel->pmu, name)) {
+ pr_err("'%s' does not have acr_mask format support\n", evsel->pmu->name);
+ return;
+ }
+ if (perf_pmu__format_type(evsel->pmu, name) !=
+ PERF_PMU_FORMAT_VALUE_CONFIG2) {
+ pr_err("'%s' does not have config2 format support\n", evsel->pmu->name);
+ return;
+ }
+
+ evsel_prev = evsel__prev(evsel);
+ if (!evsel_prev) {
+ pr_err("Previous event does not exist.\n");
+ return;
+ }
+
+ prev_attr = &evsel_prev->core.attr;
+
+ if (prev_attr->config2) {
+ pr_err("'%s' has set config2 (acr_mask?) already, configuration not supported\n", evsel_prev->name);
+ return;
+ }
+
+ /*
+ * acr_mask (config2) is calculated using the event's index in
+ * the event group. The first event will use the index of the
+ * second event as its mask (e.g., 0x2), indicating that the
+ * second event counter will be reset and a sample taken for
+ * the first event if its counter overflows. The second event
+ * will use the mask consisting of the first and second bits
+ * (e.g., 0x3), meaning both counters will be reset if the
+ * second event counter overflows.
+ */
+
+ evsel_idx = evsel__group_idx(evsel);
+ ev_mask = 1ull << evsel_idx;
+ pr_ev_mask = 1ull << (evsel_idx - 1);
+
+ prev_attr->config2 = ev_mask;
+ attr->config2 = ev_mask | pr_ev_mask;
+}
+
static void ibs_l3miss_warn(void)
{
pr_warning(
@@ -102,13 +158,15 @@ void arch__post_evsel_config(struct evsel *evsel, struct perf_event_attr *attr)
}
}
-int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size)
+static int amd_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size)
{
- if (!x86__is_amd_cpu())
+ struct perf_pmu *pmu;
+
+ if (evsel->core.attr.precise_ip == 0)
return 0;
- if (!evsel->core.attr.precise_ip &&
- !(evsel->pmu && !strncmp(evsel->pmu->name, "ibs", 3)))
+ pmu = evsel__find_pmu(evsel);
+ if (!pmu || strncmp(pmu->name, "ibs", 3))
return 0;
/* More verbose IBS errors. */
@@ -118,6 +176,54 @@ int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size)
return scnprintf(msg, size, "AMD IBS doesn't support privilege filtering. Try "
"again without the privilege modifiers (like 'k') at the end.");
}
+ return 0;
+}
+
+static int intel_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size)
+{
+ struct strbuf sb = STRBUF_INIT;
+ int ret;
+
+ if (err != EINVAL)
+ return 0;
+ if (!topdown_sys_has_perf_metrics())
+ return 0;
+
+ if (arch_is_topdown_slots(evsel)) {
+ if (!evsel__is_group_leader(evsel)) {
+ evlist__uniquify_evsel_names(evsel->evlist, &stat_config);
+ evlist__format_evsels(evsel->evlist, &sb, 2048);
+ ret = scnprintf(msg, size, "Topdown slots event can only be group leader "
+ "in '%s'.", sb.buf);
+ strbuf_release(&sb);
+ return ret;
+ }
+ } else if (arch_is_topdown_metrics(evsel)) {
+ struct evsel *pos;
+
+ evlist__for_each_entry(evsel->evlist, pos) {
+ if (pos == evsel || !arch_is_topdown_metrics(pos))
+ continue;
+
+ if (pos->core.attr.config != evsel->core.attr.config)
+ continue;
+
+ evlist__uniquify_evsel_names(evsel->evlist, &stat_config);
+ evlist__format_evsels(evsel->evlist, &sb, 2048);
+ ret = scnprintf(msg, size, "Perf metric event '%s' is duplicated "
+ "in the same group (only one event is allowed) in '%s'.",
+ evsel__name(evsel), sb.buf);
+ strbuf_release(&sb);
+ return ret;
+ }
+ }
return 0;
}
+
+int arch_evsel__open_strerror(struct evsel *evsel, int err, char *msg, size_t size)
+{
+ return x86__is_amd_cpu()
+ ? amd_evsel__open_strerror(evsel, msg, size)
+ : intel_evsel__open_strerror(evsel, err, msg, size);
+}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index add33cb5d1da..b394ad9cc635 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -12,7 +12,6 @@
#include <linux/log2.h>
#include <linux/zalloc.h>
#include <linux/err.h>
-#include <cpuid.h>
#include "../../../util/session.h"
#include "../../../util/event.h"
@@ -34,6 +33,7 @@
#include <internal/lib.h> // page_size
#include "../../../util/intel-pt.h"
#include <api/fs/fs.h>
+#include "cpuid.h"
#define KiB(x) ((x) * 1024)
#define MiB(x) ((x) * 1024 * 1024)
@@ -72,7 +72,7 @@ static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu,
int err;
parse_events_terms__init(&terms);
- err = parse_events_terms(&terms, str, /*input=*/ NULL);
+ err = parse_events_terms(&terms, str);
if (err)
goto out_free;
@@ -311,7 +311,7 @@ static void intel_pt_tsc_ctc_ratio(u32 *n, u32 *d)
{
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
- __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
+ cpuid(0x15, 0, &eax, &ebx, &ecx, &edx);
*n = ebx;
*d = eax;
}
diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c
index 424716518b75..bff36f9345ea 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -3,9 +3,11 @@
#include <string.h>
#include "../../../util/kvm-stat.h"
#include "../../../util/evsel.h"
+#include "../../../util/env.h"
#include <asm/svm.h>
#include <asm/vmx.h>
#include <asm/kvm.h>
+#include <subcmd/parse-options.h>
define_exit_reasons_table(vmx_exit_reasons, VMX_EXIT_REASONS);
define_exit_reasons_table(svm_exit_reasons, SVM_EXIT_REASONS);
@@ -211,3 +213,52 @@ int cpu_isa_init(struct perf_kvm_stat *kvm, const char *cpuid)
return 0;
}
+
+/*
+ * After KVM supports PEBS for guest on Intel platforms
+ * (https://lore.kernel.org/all/20220411101946.20262-1-likexu@tencent.com/),
+ * host loses the capability to sample guest with PEBS since all PEBS related
+ * MSRs are switched to guest value after vm-entry, like IA32_DS_AREA MSR is
+ * switched to guest GVA at vm-entry. This would lead to "perf kvm record"
+ * fails to sample guest on Intel platforms since "cycles:P" event is used to
+ * sample guest by default.
+ *
+ * So, to avoid this issue explicitly use "cycles" instead of "cycles:P" event
+ * by default to sample guest on Intel platforms.
+ */
+int kvm_add_default_arch_event(int *argc, const char **argv)
+{
+ const char **tmp;
+ bool event = false;
+ int ret = 0, i, j = *argc;
+
+ const struct option event_options[] = {
+ OPT_BOOLEAN('e', "event", &event, NULL),
+ OPT_BOOLEAN(0, "pfm-events", &event, NULL),
+ OPT_END()
+ };
+
+ if (!x86__is_intel_cpu())
+ return 0;
+
+ tmp = calloc(j + 1, sizeof(char *));
+ if (!tmp)
+ return -ENOMEM;
+
+ for (i = 0; i < j; i++)
+ tmp[i] = argv[i];
+
+ parse_options(j, tmp, event_options, NULL, PARSE_OPT_KEEP_UNKNOWN);
+ if (!event) {
+ argv[j++] = STRDUP_FAIL_EXIT("-e");
+ argv[j++] = STRDUP_FAIL_EXIT("cycles");
+ *argc += 2;
+ }
+
+ free(tmp);
+ return 0;
+
+EXIT:
+ free(tmp);
+ return ret;
+}
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 58113482654b..a3f96221758d 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -273,7 +273,6 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
{
struct perf_pmu_caps *ldlat_cap;
-#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
pmu->auxtrace = true;
pmu->selectable = true;
@@ -283,7 +282,6 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
pmu->auxtrace = true;
pmu->selectable = true;
}
-#endif
if (x86__is_amd_cpu()) {
if (strcmp(pmu->name, "ibs_op"))
diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c
index 0d01b662627a..bafd285119d7 100644
--- a/tools/perf/arch/x86/util/topdown.c
+++ b/tools/perf/arch/x86/util/topdown.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include "util/evlist.h"
#include "util/pmu.h"
#include "util/pmus.h"
diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
index f657a77314f8..374e4cb788d8 100644
--- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl
@@ -438,3 +438,5 @@
465 common listxattrat sys_listxattrat
466 common removexattrat sys_removexattrat
467 common open_tree_attr sys_open_tree_attr
+468 common file_getattr sys_file_getattr
+469 common file_setattr sys_file_setattr
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 9f736423af53..8519eb5a42fa 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -28,6 +28,7 @@ int bench_syscall_fork(int argc, const char **argv);
int bench_syscall_execve(int argc, const char **argv);
int bench_mem_memcpy(int argc, const char **argv);
int bench_mem_memset(int argc, const char **argv);
+int bench_mem_mmap(int argc, const char **argv);
int bench_mem_find_bit(int argc, const char **argv);
int bench_futex_hash(int argc, const char **argv);
int bench_futex_wake(int argc, const char **argv);
diff --git a/tools/perf/bench/evlist-open-close.c b/tools/perf/bench/evlist-open-close.c
index bfaf50e4e519..faf9c34b4a5d 100644
--- a/tools/perf/bench/evlist-open-close.c
+++ b/tools/perf/bench/evlist-open-close.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
index 7e25b0e413f6..e697c20951bc 100644
--- a/tools/perf/bench/find-bit-bench.c
+++ b/tools/perf/bench/find-bit-bench.c
@@ -37,7 +37,7 @@ static noinline void workload(int val)
accumulator++;
}
-#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
+#if defined(__i386__) || defined(__x86_64__)
static bool asm_test_bit(long nr, const unsigned long *addr)
{
bool oldbit;
diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c
index 1481196a22f0..1968c9d00b5b 100644
--- a/tools/perf/bench/futex.c
+++ b/tools/perf/bench/futex.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <err.h>
+#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/prctl.h>
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
index dd295d27044a..fcb72d682cf8 100644
--- a/tools/perf/bench/futex.h
+++ b/tools/perf/bench/futex.h
@@ -8,6 +8,7 @@
#ifndef _FUTEX_H
#define _FUTEX_H
+#include <stdbool.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 19d45c377ac1..2908a3a796c9 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -22,27 +22,39 @@
#include <string.h>
#include <unistd.h>
#include <sys/time.h>
+#include <sys/mman.h>
#include <errno.h>
#include <linux/time64.h>
-#include <linux/zalloc.h>
+#include <linux/log2.h>
#define K 1024
+#define PAGE_SHIFT_4KB 12
+#define PAGE_SHIFT_2MB 21
+#define PAGE_SHIFT_1GB 30
+
static const char *size_str = "1MB";
static const char *function_str = "all";
-static int nr_loops = 1;
+static const char *page_size_str = "4KB";
+static const char *chunk_size_str = "0";
+static unsigned int nr_loops = 1;
static bool use_cycles;
static int cycles_fd;
+static unsigned int seed;
-static const struct option options[] = {
+static const struct option bench_common_options[] = {
OPT_STRING('s', "size", &size_str, "1MB",
"Specify the size of the memory buffers. "
"Available units: B, KB, MB, GB and TB (case insensitive)"),
+ OPT_STRING('p', "page", &page_size_str, "4KB",
+ "Specify page-size for mapping memory buffers. "
+ "Available sizes: 4KB, 2MB, 1GB (case insensitive)"),
+
OPT_STRING('f', "function", &function_str, "all",
"Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
- OPT_INTEGER('l', "nr_loops", &nr_loops,
+ OPT_UINTEGER('l', "nr_loops", &nr_loops,
"Specify the number of loops to run. (default: 1)"),
OPT_BOOLEAN('c', "cycles", &use_cycles,
@@ -51,15 +63,56 @@ static const struct option options[] = {
OPT_END()
};
+static const struct option bench_mem_options[] = {
+ OPT_STRING('k', "chunk", &chunk_size_str, "0",
+ "Specify the chunk-size for each invocation. "
+ "Available units: B, KB, MB, GB and TB (case insensitive)"),
+ OPT_PARENT(bench_common_options),
+ OPT_END()
+};
+
+union bench_clock {
+ u64 cycles;
+ struct timeval tv;
+};
+
+struct bench_params {
+ size_t size;
+ size_t size_total;
+ size_t chunk_size;
+ unsigned int nr_loops;
+ unsigned int page_shift;
+ unsigned int seed;
+};
+
+struct bench_mem_info {
+ const struct function *functions;
+ int (*do_op)(const struct function *r, struct bench_params *p,
+ void *src, void *dst, union bench_clock *rt);
+ const char *const *usage;
+ const struct option *options;
+ bool alloc_src;
+};
+
+typedef bool (*mem_init_t)(struct bench_mem_info *, struct bench_params *,
+ void **, void **);
+typedef void (*mem_fini_t)(struct bench_mem_info *, struct bench_params *,
+ void **, void **);
typedef void *(*memcpy_t)(void *, const void *, size_t);
typedef void *(*memset_t)(void *, int, size_t);
+typedef void (*mmap_op_t)(void *, size_t, unsigned int, bool);
struct function {
const char *name;
const char *desc;
- union {
- memcpy_t memcpy;
- memset_t memset;
+ struct {
+ mem_init_t init;
+ mem_fini_t fini;
+ union {
+ memcpy_t memcpy;
+ memset_t memset;
+ mmap_op_t mmap_op;
+ };
} fn;
};
@@ -91,6 +144,34 @@ static u64 get_cycles(void)
return clk;
}
+static void clock_get(union bench_clock *t)
+{
+ if (use_cycles)
+ t->cycles = get_cycles();
+ else
+ BUG_ON(gettimeofday(&t->tv, NULL));
+}
+
+static union bench_clock clock_diff(union bench_clock *s, union bench_clock *e)
+{
+ union bench_clock t;
+
+ if (use_cycles)
+ t.cycles = e->cycles - s->cycles;
+ else
+ timersub(&e->tv, &s->tv, &t.tv);
+
+ return t;
+}
+
+static void clock_accum(union bench_clock *a, union bench_clock *b)
+{
+ if (use_cycles)
+ a->cycles += b->cycles;
+ else
+ timeradd(&a->tv, &b->tv, &a->tv);
+}
+
static double timeval2double(struct timeval *ts)
{
return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC;
@@ -107,54 +188,40 @@ static double timeval2double(struct timeval *ts)
printf(" %14lf GB/sec\n", x / K / K / K); \
} while (0)
-struct bench_mem_info {
- const struct function *functions;
- u64 (*do_cycles)(const struct function *r, size_t size, void *src, void *dst);
- double (*do_gettimeofday)(const struct function *r, size_t size, void *src, void *dst);
- const char *const *usage;
- bool alloc_src;
-};
-
-static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
+static void __bench_mem_function(struct bench_mem_info *info, struct bench_params *p,
+ int r_idx)
{
const struct function *r = &info->functions[r_idx];
double result_bps = 0.0;
- u64 result_cycles = 0;
- void *src = NULL, *dst = zalloc(size);
+ union bench_clock rt = { 0 };
+ void *src = NULL, *dst = NULL;
printf("# function '%s' (%s)\n", r->name, r->desc);
- if (dst == NULL)
- goto out_alloc_failed;
-
- if (info->alloc_src) {
- src = zalloc(size);
- if (src == NULL)
- goto out_alloc_failed;
- }
+ if (r->fn.init && r->fn.init(info, p, &src, &dst))
+ goto out_init_failed;
if (bench_format == BENCH_FORMAT_DEFAULT)
printf("# Copying %s bytes ...\n\n", size_str);
- if (use_cycles) {
- result_cycles = info->do_cycles(r, size, src, dst);
- } else {
- result_bps = info->do_gettimeofday(r, size, src, dst);
- }
+ if (info->do_op(r, p, src, dst, &rt))
+ goto out_test_failed;
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
if (use_cycles) {
- printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
+ printf(" %14lf cycles/byte\n", (double)rt.cycles/(double)p->size_total);
} else {
+ result_bps = (double)p->size_total/timeval2double(&rt.tv);
print_bps(result_bps);
}
break;
case BENCH_FORMAT_SIMPLE:
if (use_cycles) {
- printf("%lf\n", (double)result_cycles/size_total);
+ printf("%lf\n", (double)rt.cycles/(double)p->size_total);
} else {
+ result_bps = (double)p->size_total/timeval2double(&rt.tv);
printf("%lf\n", result_bps);
}
break;
@@ -164,22 +231,23 @@ static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t
break;
}
+out_test_failed:
out_free:
- free(src);
- free(dst);
+ if (r->fn.fini) r->fn.fini(info, p, &src, &dst);
return;
-out_alloc_failed:
- printf("# Memory allocation failed - maybe size (%s) is too large?\n", size_str);
+out_init_failed:
+ printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str,
+ p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large");
goto out_free;
}
static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
{
int i;
- size_t size;
- double size_total;
+ struct bench_params p = { 0 };
+ unsigned int page_size;
- argc = parse_options(argc, argv, options, info->usage, 0);
+ argc = parse_options(argc, argv, info->options, info->usage, 0);
if (use_cycles) {
i = init_cycles();
@@ -189,17 +257,37 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
}
}
- size = (size_t)perf_atoll((char *)size_str);
- size_total = (double)size * nr_loops;
+ p.nr_loops = nr_loops;
+ p.size = (size_t)perf_atoll((char *)size_str);
- if ((s64)size <= 0) {
+ if ((s64)p.size <= 0) {
fprintf(stderr, "Invalid size:%s\n", size_str);
return 1;
}
+ p.size_total = p.size * p.nr_loops;
+
+ p.chunk_size = (size_t)perf_atoll((char *)chunk_size_str);
+ if ((s64)p.chunk_size < 0 || (s64)p.chunk_size > (s64)p.size) {
+ fprintf(stderr, "Invalid chunk_size:%s\n", chunk_size_str);
+ return 1;
+ }
+ if (!p.chunk_size)
+ p.chunk_size = p.size;
+
+ page_size = (unsigned int)perf_atoll((char *)page_size_str);
+ if (page_size != (1 << PAGE_SHIFT_4KB) &&
+ page_size != (1 << PAGE_SHIFT_2MB) &&
+ page_size != (1 << PAGE_SHIFT_1GB)) {
+ fprintf(stderr, "Invalid page-size:%s\n", page_size_str);
+ return 1;
+ }
+ p.page_shift = ilog2(page_size);
+
+ p.seed = seed;
if (!strncmp(function_str, "all", 3)) {
for (i = 0; info->functions[i].name; i++)
- __bench_mem_function(info, i, size, size_total);
+ __bench_mem_function(info, &p, i);
return 0;
}
@@ -218,7 +306,7 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
return 1;
}
- __bench_mem_function(info, i, size, size_total);
+ __bench_mem_function(info, &p, i);
return 0;
}
@@ -235,47 +323,81 @@ static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
fn(dst, src, size);
}
-static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+static int do_memcpy(const struct function *r, struct bench_params *p,
+ void *src, void *dst, union bench_clock *rt)
{
- u64 cycle_start = 0ULL, cycle_end = 0ULL;
+ union bench_clock start, end;
memcpy_t fn = r->fn.memcpy;
- int i;
- memcpy_prefault(fn, size, src, dst);
+ memcpy_prefault(fn, p->size, src, dst);
+
+ clock_get(&start);
+ for (unsigned int i = 0; i < p->nr_loops; ++i)
+ for (size_t off = 0; off < p->size; off += p->chunk_size)
+ fn(dst + off, src + off, min(p->chunk_size, p->size - off));
+ clock_get(&end);
- cycle_start = get_cycles();
- for (i = 0; i < nr_loops; ++i)
- fn(dst, src, size);
- cycle_end = get_cycles();
+ *rt = clock_diff(&start, &end);
- return cycle_end - cycle_start;
+ return 0;
}
-static double do_memcpy_gettimeofday(const struct function *r, size_t size, void *src, void *dst)
+static void *bench_mmap(size_t size, bool populate, unsigned int page_shift)
{
- struct timeval tv_start, tv_end, tv_diff;
- memcpy_t fn = r->fn.memcpy;
- int i;
+ void *p;
+ int extra = populate ? MAP_POPULATE : 0;
+
+ if (page_shift != PAGE_SHIFT_4KB)
+ extra |= MAP_HUGETLB | (page_shift << MAP_HUGE_SHIFT);
+
+ p = mmap(NULL, size, PROT_READ|PROT_WRITE,
+ extra | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+
+ return p == MAP_FAILED ? NULL : p;
+}
+
+static void bench_munmap(void *p, size_t size)
+{
+ if (p)
+ munmap(p, size);
+}
+
+static bool mem_alloc(struct bench_mem_info *info, struct bench_params *p,
+ void **src, void **dst)
+{
+ bool failed;
- memcpy_prefault(fn, size, src, dst);
+ *dst = bench_mmap(p->size, true, p->page_shift);
+ failed = *dst == NULL;
- BUG_ON(gettimeofday(&tv_start, NULL));
- for (i = 0; i < nr_loops; ++i)
- fn(dst, src, size);
- BUG_ON(gettimeofday(&tv_end, NULL));
+ if (info->alloc_src) {
+ *src = bench_mmap(p->size, true, p->page_shift);
+ failed = failed || *src == NULL;
+ }
+
+ return failed;
+}
- timersub(&tv_end, &tv_start, &tv_diff);
+static void mem_free(struct bench_mem_info *info __maybe_unused,
+ struct bench_params *p __maybe_unused,
+ void **src, void **dst)
+{
+ bench_munmap(*dst, p->size);
+ bench_munmap(*src, p->size);
- return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
+ *dst = *src = NULL;
}
struct function memcpy_functions[] = {
{ .name = "default",
.desc = "Default memcpy() provided by glibc",
+ .fn.init = mem_alloc,
+ .fn.fini = mem_free,
.fn.memcpy = memcpy },
#ifdef HAVE_ARCH_X86_64_SUPPORT
-# define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
+# define MEMCPY_FN(_fn, _init, _fini, _name, _desc) \
+ {.name = _name, .desc = _desc, .fn.memcpy = _fn, .fn.init = _init, .fn.fini = _fini },
# include "mem-memcpy-x86-64-asm-def.h"
# undef MEMCPY_FN
#endif
@@ -292,55 +414,36 @@ int bench_mem_memcpy(int argc, const char **argv)
{
struct bench_mem_info info = {
.functions = memcpy_functions,
- .do_cycles = do_memcpy_cycles,
- .do_gettimeofday = do_memcpy_gettimeofday,
+ .do_op = do_memcpy,
.usage = bench_mem_memcpy_usage,
+ .options = bench_mem_options,
.alloc_src = true,
};
return bench_mem_common(argc, argv, &info);
}
-static u64 do_memset_cycles(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
-{
- u64 cycle_start = 0ULL, cycle_end = 0ULL;
- memset_t fn = r->fn.memset;
- int i;
-
- /*
- * We prefault the freshly allocated memory range here,
- * to not measure page fault overhead:
- */
- fn(dst, -1, size);
-
- cycle_start = get_cycles();
- for (i = 0; i < nr_loops; ++i)
- fn(dst, i, size);
- cycle_end = get_cycles();
-
- return cycle_end - cycle_start;
-}
-
-static double do_memset_gettimeofday(const struct function *r, size_t size, void *src __maybe_unused, void *dst)
+static int do_memset(const struct function *r, struct bench_params *p,
+ void *src __maybe_unused, void *dst, union bench_clock *rt)
{
- struct timeval tv_start, tv_end, tv_diff;
+ union bench_clock start, end;
memset_t fn = r->fn.memset;
- int i;
/*
* We prefault the freshly allocated memory range here,
* to not measure page fault overhead:
*/
- fn(dst, -1, size);
+ fn(dst, -1, p->size);
- BUG_ON(gettimeofday(&tv_start, NULL));
- for (i = 0; i < nr_loops; ++i)
- fn(dst, i, size);
- BUG_ON(gettimeofday(&tv_end, NULL));
+ clock_get(&start);
+ for (unsigned int i = 0; i < p->nr_loops; ++i)
+ for (size_t off = 0; off < p->size; off += p->chunk_size)
+ fn(dst + off, i, min(p->chunk_size, p->size - off));
+ clock_get(&end);
- timersub(&tv_end, &tv_start, &tv_diff);
+ *rt = clock_diff(&start, &end);
- return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
+ return 0;
}
static const char * const bench_mem_memset_usage[] = {
@@ -351,10 +454,13 @@ static const char * const bench_mem_memset_usage[] = {
static const struct function memset_functions[] = {
{ .name = "default",
.desc = "Default memset() provided by glibc",
+ .fn.init = mem_alloc,
+ .fn.fini = mem_free,
.fn.memset = memset },
#ifdef HAVE_ARCH_X86_64_SUPPORT
-# define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
+# define MEMSET_FN(_fn, _init, _fini, _name, _desc) \
+ {.name = _name, .desc = _desc, .fn.memset = _fn, .fn.init = _init, .fn.fini = _fini },
# include "mem-memset-x86-64-asm-def.h"
# undef MEMSET_FN
#endif
@@ -366,9 +472,91 @@ int bench_mem_memset(int argc, const char **argv)
{
struct bench_mem_info info = {
.functions = memset_functions,
- .do_cycles = do_memset_cycles,
- .do_gettimeofday = do_memset_gettimeofday,
+ .do_op = do_memset,
.usage = bench_mem_memset_usage,
+ .options = bench_mem_options,
+ };
+
+ return bench_mem_common(argc, argv, &info);
+}
+
+static void mmap_page_touch(void *dst, size_t size, unsigned int page_shift, bool random)
+{
+ unsigned long npages = size / (1 << page_shift);
+ unsigned long offset = 0, r = 0;
+
+ for (unsigned long i = 0; i < npages; i++) {
+ if (random)
+ r = rand() % (1 << page_shift);
+
+ *((char *)dst + offset + r) = *(char *)(dst + offset + r) + i;
+ offset += 1 << page_shift;
+ }
+}
+
+static int do_mmap(const struct function *r, struct bench_params *p,
+ void *src __maybe_unused, void *dst __maybe_unused,
+ union bench_clock *accum)
+{
+ union bench_clock start, end, diff;
+ mmap_op_t fn = r->fn.mmap_op;
+ bool populate = strcmp(r->name, "populate") == 0;
+
+ if (p->seed)
+ srand(p->seed);
+
+ for (unsigned int i = 0; i < p->nr_loops; i++) {
+ clock_get(&start);
+ dst = bench_mmap(p->size, populate, p->page_shift);
+ if (!dst)
+ goto out;
+
+ fn(dst, p->size, p->page_shift, p->seed);
+ clock_get(&end);
+ diff = clock_diff(&start, &end);
+ clock_accum(accum, &diff);
+
+ bench_munmap(dst, p->size);
+ }
+
+ return 0;
+out:
+ printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str,
+ p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large");
+ return -1;
+}
+
+static const char * const bench_mem_mmap_usage[] = {
+ "perf bench mem mmap <options>",
+ NULL
+};
+
+static const struct function mmap_functions[] = {
+ { .name = "demand",
+ .desc = "Demand loaded mmap()",
+ .fn.mmap_op = mmap_page_touch },
+
+ { .name = "populate",
+ .desc = "Eagerly populated mmap()",
+ .fn.mmap_op = mmap_page_touch },
+
+ { .name = NULL, }
+};
+
+int bench_mem_mmap(int argc, const char **argv)
+{
+ static const struct option bench_mmap_options[] = {
+ OPT_UINTEGER('r', "randomize", &seed,
+ "Seed to randomize page access offset."),
+ OPT_PARENT(bench_common_options),
+ OPT_END()
+ };
+
+ struct bench_mem_info info = {
+ .functions = mmap_functions,
+ .do_op = do_mmap,
+ .usage = bench_mem_mmap_usage,
+ .options = bench_mmap_options,
};
return bench_mem_common(argc, argv, &info);
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
index 5bcaec5601a8..852e48cfd8fe 100644
--- a/tools/perf/bench/mem-memcpy-arch.h
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -2,7 +2,7 @@
#ifdef HAVE_ARCH_X86_64_SUPPORT
-#define MEMCPY_FN(fn, name, desc) \
+#define MEMCPY_FN(fn, init, fini, name, desc) \
void *fn(void *, const void *, size_t);
#include "mem-memcpy-x86-64-asm-def.h"
diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
index 6188e19d3129..f43038f4448b 100644
--- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h
@@ -1,9 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
MEMCPY_FN(memcpy_orig,
+ mem_alloc,
+ mem_free,
"x86-64-unrolled",
"unrolled memcpy() in arch/x86/lib/memcpy_64.S")
MEMCPY_FN(__memcpy,
+ mem_alloc,
+ mem_free,
"x86-64-movsq",
"movsq-based memcpy() in arch/x86/lib/memcpy_64.S")
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
index 53f45482663f..278c5da12d63 100644
--- a/tools/perf/bench/mem-memset-arch.h
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -2,7 +2,7 @@
#ifdef HAVE_ARCH_X86_64_SUPPORT
-#define MEMSET_FN(fn, name, desc) \
+#define MEMSET_FN(fn, init, fini, name, desc) \
void *fn(void *, int, size_t);
#include "mem-memset-x86-64-asm-def.h"
diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h
index 247c72fdfb9d..80ad1b7ea770 100644
--- a/tools/perf/bench/mem-memset-x86-64-asm-def.h
+++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h
@@ -1,9 +1,13 @@
/* SPDX-License-Identifier: GPL-2.0 */
MEMSET_FN(memset_orig,
+ mem_alloc,
+ mem_free,
"x86-64-unrolled",
"unrolled memset() in arch/x86/lib/memset_64.S")
MEMSET_FN(__memset,
+ mem_alloc,
+ mem_free,
"x86-64-stosq",
"movsq-based memset() in arch/x86/lib/memset_64.S")
diff --git a/tools/perf/bench/pmu-scan.c b/tools/perf/bench/pmu-scan.c
index 9e4d36486f62..14a464ad8cea 100644
--- a/tools/perf/bench/pmu-scan.c
+++ b/tools/perf/bench/pmu-scan.c
@@ -4,6 +4,7 @@
*
* Copyright 2023 Google LLC.
*/
+#include <errno.h>
#include <stdio.h>
#include "bench.h"
#include "util/debug.h"
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
index b3d493697675..265d49a913d9 100644
--- a/tools/perf/bench/synthesize.c
+++ b/tools/perf/bench/synthesize.c
@@ -6,6 +6,7 @@
*
* Copyright 2019 Google LLC.
*/
+#include <errno.h>
#include <stdio.h>
#include "bench.h"
#include "../util/debug.h"
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 5d57d2913f3d..9c27bb30b708 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -313,7 +313,8 @@ out_put:
return ret;
}
-static int process_feature_event(struct perf_session *session,
+static int process_feature_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
if (event->feat.feat_id < HEADER_LAST_FEATURE)
@@ -519,7 +520,7 @@ find_next:
/* skip missing symbols */
nd = rb_next(nd);
} else if (use_browser == 1) {
- key = hist_entry__tui_annotate(he, evsel, NULL);
+ key = hist_entry__tui_annotate(he, evsel, NULL, NO_ADDR);
switch (key) {
case -1:
@@ -917,11 +918,6 @@ int cmd_annotate(int argc, const char **argv)
symbol_conf.annotate_data_sample = true;
} else if (annotate_opts.code_with_type) {
symbol_conf.annotate_data_member = true;
-
- if (!annotate.use_stdio) {
- pr_err("--code-with-type only works with --stdio.\n");
- goto out_delete;
- }
}
setup_browser(true);
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 2c1a9f3d847a..02dea1b88228 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -65,6 +65,7 @@ static struct bench mem_benchmarks[] = {
{ "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy },
{ "memset", "Benchmark for memset() functions", bench_mem_memset },
{ "find_bit", "Benchmark for find_bit() functions", bench_mem_find_bit },
+ { "mmap", "Benchmark for mmap() mappings", bench_mem_mmap },
{ "all", "Run all memory access benchmarks", NULL },
{ NULL, NULL, NULL }
};
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 9e9ff471ddd1..d390ae4e3ec8 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -45,6 +45,8 @@
#include "pmus.h"
#include "string2.h"
#include "util/util.h"
+#include "util/symbol.h"
+#include "util/annotate.h"
struct c2c_hists {
struct hists hists;
@@ -62,6 +64,7 @@ struct compute_stats {
struct c2c_hist_entry {
struct c2c_hists *hists;
+ struct evsel *evsel;
struct c2c_stats stats;
unsigned long *cpuset;
unsigned long *nodeset;
@@ -225,6 +228,12 @@ he__get_c2c_hists(struct hist_entry *he,
return hists;
}
+static void c2c_he__set_evsel(struct c2c_hist_entry *c2c_he,
+ struct evsel *evsel)
+{
+ c2c_he->evsel = evsel;
+}
+
static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
struct perf_sample *sample)
{
@@ -275,6 +284,33 @@ static void compute_stats(struct c2c_hist_entry *c2c_he,
update_stats(&cstats->load, weight);
}
+/*
+ * Return true if annotation is possible. When list is NULL,
+ * it means that we are called at the c2c_browser level,
+ * in that case we allow annotation to be initialized. When list
+ * is non-NULL, it means that we are called at the cacheline_browser
+ * level, in that case we allow annotation only if use_browser
+ * is set and symbol information is available.
+ */
+static bool perf_c2c__has_annotation(struct perf_hpp_list *list)
+{
+ if (use_browser != 1)
+ return false;
+ return !list || list->sym;
+}
+
+static void perf_c2c__evsel_hists_inc_stats(struct evsel *evsel,
+ struct hist_entry *he,
+ struct perf_sample *sample)
+{
+ struct hists *evsel_hists = evsel__hists(evsel);
+
+ hists__inc_nr_samples(evsel_hists, he->filtered);
+ evsel_hists->stats.total_period += sample->period;
+ if (!he->filtered)
+ evsel_hists->stats.total_non_filtered_period += sample->period;
+}
+
static int process_sample_event(const struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -286,7 +322,7 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
struct c2c_stats stats = { .nr_entries = 0, };
struct hist_entry *he;
struct addr_location al;
- struct mem_info *mi, *mi_dup;
+ struct mem_info *mi = NULL;
struct callchain_cursor *cursor;
int ret;
@@ -313,20 +349,15 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
goto out;
}
- /*
- * The mi object is released in hists__add_entry_ops,
- * if it gets sorted out into existing data, so we need
- * to take the copy now.
- */
- mi_dup = mem_info__get(mi);
-
c2c_decode_stats(&stats, mi);
he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
&al, NULL, NULL, mi, NULL,
sample, true);
- if (he == NULL)
- goto free_mi;
+ if (he == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_add_stats(&c2c_he->stats, &stats);
@@ -334,8 +365,15 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
c2c_he__set_cpu(c2c_he, sample);
c2c_he__set_node(c2c_he, sample);
+ c2c_he__set_evsel(c2c_he, evsel);
hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
+
+ if (perf_c2c__has_annotation(NULL)) {
+ perf_c2c__evsel_hists_inc_stats(evsel, he, sample);
+ addr_map_symbol__inc_samples(mem_info__iaddr(mi), sample, evsel);
+ }
+
ret = hist_entry__append_callchain(he, sample);
if (!ret) {
@@ -350,17 +388,19 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu;
int node = c2c.cpu2node[cpu];
- mi = mi_dup;
-
c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2, machine->env);
- if (!c2c_hists)
- goto free_mi;
+ if (!c2c_hists) {
+ ret = -ENOMEM;
+ goto out;
+ }
he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops,
&al, NULL, NULL, mi, NULL,
sample, true);
- if (he == NULL)
- goto free_mi;
+ if (he == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
c2c_he = container_of(he, struct c2c_hist_entry, he);
c2c_add_stats(&c2c_he->stats, &stats);
@@ -371,20 +411,16 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused,
c2c_he__set_cpu(c2c_he, sample);
c2c_he__set_node(c2c_he, sample);
+ c2c_he__set_evsel(c2c_he, evsel);
hists__inc_nr_samples(&c2c_hists->hists, he->filtered);
ret = hist_entry__append_callchain(he, sample);
}
out:
+ mem_info__put(mi);
addr_location__exit(&al);
return ret;
-
-free_mi:
- mem_info__put(mi_dup);
- mem_info__put(mi);
- ret = -ENOMEM;
- goto out;
}
static const char * const c2c_usage[] = {
@@ -1997,6 +2033,9 @@ static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name, stru
if (dim == &dim_dso)
hpp_list->dso = 1;
+ if (dim == &dim_symbol || dim == &dim_iaddr)
+ hpp_list->sym = 1;
+
perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt);
return 0;
}
@@ -2550,6 +2589,44 @@ static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session)
}
#ifdef HAVE_SLANG_SUPPORT
+
+static int perf_c2c__toggle_annotation(struct hist_browser *browser)
+{
+ struct hist_entry *he = browser->he_selection;
+ struct symbol *sym = NULL;
+ struct annotated_source *src = NULL;
+ struct c2c_hist_entry *c2c_he = NULL;
+ u64 al_addr = NO_ADDR;
+
+ if (!perf_c2c__has_annotation(he->hists->hpp_list)) {
+ ui_browser__help_window(&browser->b, "No annotation support");
+ return 0;
+ }
+
+ if (he == NULL) {
+ ui_browser__help_window(&browser->b, "No entry selected for annotation");
+ return 0;
+ }
+
+ sym = he->ms.sym;
+ if (sym == NULL) {
+ ui_browser__help_window(&browser->b, "Can not annotate, no symbol found");
+ return 0;
+ }
+
+ src = symbol__hists(sym, 0);
+ if (src == NULL) {
+ ui_browser__help_window(&browser->b, "Failed to initialize annotation source");
+ return 0;
+ }
+
+ if (he->mem_info)
+ al_addr = mem_info__iaddr(he->mem_info)->al_addr;
+
+ c2c_he = container_of(he, struct c2c_hist_entry, he);
+ return hist_entry__tui_annotate(he, c2c_he->evsel, NULL, al_addr);
+}
+
static void c2c_browser__update_nr_entries(struct hist_browser *hb)
{
u64 nr_entries = 0;
@@ -2617,6 +2694,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
" ENTER Toggle callchains (if present) \n"
" n Toggle Node details info \n"
" s Toggle full length of symbol and source line columns \n"
+ " a Toggle annotation view \n"
" q Return back to cacheline list \n";
if (!he)
@@ -2651,6 +2729,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he)
c2c.node_info = (c2c.node_info + 1) % 3;
setup_nodes_header();
break;
+ case 'a':
+ perf_c2c__toggle_annotation(browser);
+ break;
case 'q':
goto out;
case '?':
@@ -3006,6 +3087,7 @@ static int perf_c2c__report(int argc, const char **argv)
const char *display = NULL;
const char *coalesce = NULL;
bool no_source = false;
+ const char *disassembler_style = NULL, *objdump_path = NULL;
const struct option options[] = {
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
"file", "vmlinux pathname"),
@@ -3033,6 +3115,10 @@ static int perf_c2c__report(int argc, const char **argv)
OPT_BOOLEAN(0, "stitch-lbr", &c2c.stitch_lbr,
"Enable LBR callgraph stitching approach"),
OPT_BOOLEAN(0, "double-cl", &chk_double_cl, "Detect adjacent cacheline false sharing"),
+ OPT_STRING('M', "disassembler-style", &disassembler_style, "disassembler style",
+ "Specify disassembler style (e.g. -M intel for intel syntax)"),
+ OPT_STRING(0, "objdump", &objdump_path, "path",
+ "objdump binary to use for disassembly and annotations"),
OPT_PARENT(c2c_options),
OPT_END()
};
@@ -3040,6 +3126,12 @@ static int perf_c2c__report(int argc, const char **argv)
const char *output_str, *sort_str = NULL;
struct perf_env *env;
+ annotation_options__init();
+
+ err = hists__init();
+ if (err < 0)
+ goto out;
+
argc = parse_options(argc, argv, options, report_c2c_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (argc)
@@ -3052,6 +3144,27 @@ static int perf_c2c__report(int argc, const char **argv)
if (c2c.stats_only)
c2c.use_stdio = true;
+ /**
+ * Annotation related options disassembler_style, objdump_path are set
+ * in the c2c_options, so we can use them here.
+ */
+ if (disassembler_style) {
+ annotate_opts.disassembler_style = strdup(disassembler_style);
+ if (!annotate_opts.disassembler_style) {
+ err = -ENOMEM;
+ pr_err("Failed to allocate memory for annotation options\n");
+ goto out;
+ }
+ }
+ if (objdump_path) {
+ annotate_opts.objdump_path = strdup(objdump_path);
+ if (!annotate_opts.objdump_path) {
+ err = -ENOMEM;
+ pr_err("Failed to allocate memory for annotation options\n");
+ goto out;
+ }
+ }
+
err = symbol__validate_sym_arguments();
if (err)
goto out;
@@ -3126,6 +3239,38 @@ static int perf_c2c__report(int argc, const char **argv)
if (err)
goto out_mem2node;
+ if (c2c.use_stdio)
+ use_browser = 0;
+ else
+ use_browser = 1;
+
+ /*
+ * Only in the TUI browser we are doing integrated annotation,
+ * so don't allocate extra space that won't be used in the stdio
+ * implementation.
+ */
+ if (perf_c2c__has_annotation(NULL)) {
+ int ret = symbol__annotation_init();
+
+ if (ret < 0)
+ goto out_mem2node;
+ /*
+ * For searching by name on the "Browse map details".
+ * providing it only in verbose mode not to bloat too
+ * much struct symbol.
+ */
+ if (verbose > 0) {
+ /*
+ * XXX: Need to provide a less kludgy way to ask for
+ * more space per symbol, the u32 is for the index on
+ * the ui browser.
+ * See symbol__browser_index.
+ */
+ symbol_conf.priv_size += sizeof(u32);
+ }
+ annotation_config__init();
+ }
+
if (symbol__init(env) < 0)
goto out_mem2node;
@@ -3135,11 +3280,6 @@ static int perf_c2c__report(int argc, const char **argv)
goto out_mem2node;
}
- if (c2c.use_stdio)
- use_browser = 0;
- else
- use_browser = 1;
-
setup_browser(false);
err = perf_session__process_events(session);
@@ -3210,6 +3350,7 @@ out_mem2node:
out_session:
perf_session__delete(session);
out:
+ annotation_options__exit();
return err;
}
diff --git a/tools/perf/builtin-check.c b/tools/perf/builtin-check.c
index b1e205871ab1..d19769a8f689 100644
--- a/tools/perf/builtin-check.c
+++ b/tools/perf/builtin-check.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include "builtin.h"
#include "color.h"
+#include "util/bpf-utils.h"
#include "util/debug.h"
#include "util/header.h"
#include <tools/config.h>
@@ -41,15 +42,15 @@ struct feature_status supported_features[] = {
FEATURE_STATUS("dwarf", HAVE_LIBDW_SUPPORT),
FEATURE_STATUS("dwarf_getlocations", HAVE_LIBDW_SUPPORT),
FEATURE_STATUS("dwarf-unwind", HAVE_DWARF_UNWIND_SUPPORT),
- FEATURE_STATUS("auxtrace", HAVE_AUXTRACE_SUPPORT),
FEATURE_STATUS_TIP("libbfd", HAVE_LIBBFD_SUPPORT, "Deprecated, license incompatibility, use BUILD_NONDISTRO=1 and install binutils-dev[el]"),
FEATURE_STATUS("libbpf-strings", HAVE_LIBBPF_STRINGS_SUPPORT),
FEATURE_STATUS("libcapstone", HAVE_LIBCAPSTONE_SUPPORT),
FEATURE_STATUS("libdw-dwarf-unwind", HAVE_LIBDW_SUPPORT),
FEATURE_STATUS("libelf", HAVE_LIBELF_SUPPORT),
+ FEATURE_STATUS("libLLVM", HAVE_LIBLLVM_SUPPORT),
FEATURE_STATUS("libnuma", HAVE_LIBNUMA_SUPPORT),
FEATURE_STATUS("libopencsd", HAVE_CSTRACE_SUPPORT),
- FEATURE_STATUS("libperl", HAVE_LIBPERL_SUPPORT),
+ FEATURE_STATUS_TIP("libperl", HAVE_LIBPERL_SUPPORT, "Deprecated, use LIBPERL=1 and install perl-ExtUtils-Embed/libperl-dev to build with it"),
FEATURE_STATUS("libpfm4", HAVE_LIBPFM),
FEATURE_STATUS("libpython", HAVE_LIBPYTHON_SUPPORT),
FEATURE_STATUS("libslang", HAVE_SLANG_SUPPORT),
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index a9bd7bbef5a9..fb6e2c3c24c8 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -19,7 +19,8 @@
#include "util/tool.h"
#include "util/util.h"
-static int process_header_feature(struct perf_session *session __maybe_unused,
+static int process_header_feature(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
union perf_event *event __maybe_unused)
{
session_done = 1;
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 40ba6a94f719..aa7be4fb5838 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -197,18 +197,20 @@ static int perf_event__drop_oe(const struct perf_tool *tool __maybe_unused,
}
#endif
-static int perf_event__repipe_op2_synth(struct perf_session *session,
+static int perf_event__repipe_op2_synth(const struct perf_tool *tool,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
- return perf_event__repipe_synth(session->tool, event);
+ return perf_event__repipe_synth(tool, event);
}
-static int perf_event__repipe_op4_synth(struct perf_session *session,
+static int perf_event__repipe_op4_synth(const struct perf_tool *tool,
+ struct perf_session *session __maybe_unused,
union perf_event *event,
u64 data __maybe_unused,
const char *str __maybe_unused)
{
- return perf_event__repipe_synth(session->tool, event);
+ return perf_event__repipe_synth(tool, event);
}
static int perf_event__repipe_attr(const struct perf_tool *tool,
@@ -237,8 +239,6 @@ static int perf_event__repipe_event_update(const struct perf_tool *tool,
return perf_event__repipe_synth(tool, event);
}
-#ifdef HAVE_AUXTRACE_SUPPORT
-
static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t size)
{
char buf[4096];
@@ -258,12 +258,11 @@ static int copy_bytes(struct perf_inject *inject, struct perf_data *data, off_t
return 0;
}
-static s64 perf_event__repipe_auxtrace(struct perf_session *session,
+static s64 perf_event__repipe_auxtrace(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
- struct perf_inject *inject = container_of(tool, struct perf_inject,
- tool);
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
int ret;
inject->have_auxtrace = true;
@@ -296,18 +295,6 @@ static s64 perf_event__repipe_auxtrace(struct perf_session *session,
return event->auxtrace.size;
}
-#else
-
-static s64
-perf_event__repipe_auxtrace(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused)
-{
- pr_err("AUX area tracing not supported\n");
- return -EINVAL;
-}
-
-#endif
-
static int perf_event__repipe(const struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
@@ -661,12 +648,13 @@ static int perf_event__repipe_exit(const struct perf_tool *tool,
}
#ifdef HAVE_LIBTRACEEVENT
-static int perf_event__repipe_tracing_data(struct perf_session *session,
+static int perf_event__repipe_tracing_data(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event)
{
- perf_event__repipe_synth(session->tool, event);
+ perf_event__repipe_synth(tool, event);
- return perf_event__process_tracing_data(session, event);
+ return perf_event__process_tracing_data(tool, session, event);
}
#endif
@@ -1348,7 +1336,7 @@ static int process_build_id(const struct perf_tool *tool,
{
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
- return perf_event__process_build_id(inject->session, event);
+ return perf_event__process_build_id(tool, inject->session, event);
}
static int synthesize_build_id(struct perf_inject *inject, struct dso *dso, pid_t machine_pid)
@@ -1780,9 +1768,10 @@ static int host__repipe(const struct perf_tool *tool,
return perf_event__repipe(tool, event, sample, machine);
}
-static int host__finished_init(struct perf_session *session, union perf_event *event)
+static int host__finished_init(const struct perf_tool *tool, struct perf_session *session,
+ union perf_event *event)
{
- struct perf_inject *inject = container_of(session->tool, struct perf_inject, tool);
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
struct guest_session *gs = &inject->guest_session;
int ret;
@@ -1829,7 +1818,7 @@ static int host__finished_init(struct perf_session *session, union perf_event *e
if (ret)
return ret;
- return perf_event__repipe_op2_synth(session, event);
+ return perf_event__repipe_op2_synth(tool, session, event);
}
/*
@@ -2538,6 +2527,7 @@ int cmd_inject(int argc, const char **argv)
inject.tool.auxtrace = perf_event__repipe_auxtrace;
inject.tool.bpf_metadata = perf_event__repipe_op2_synth;
inject.tool.dont_split_sample_group = true;
+ inject.tool.merge_deferred_callchains = false;
inject.session = __perf_session__new(&data, &inject.tool,
/*trace_event_repipe=*/inject.output.is_pipe,
/*host_env=*/NULL);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 7b15b4a705e4..c61369d54dd9 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1636,14 +1636,6 @@ exit:
return ret;
}
-#define STRDUP_FAIL_EXIT(s) \
- ({ char *_p; \
- _p = strdup(s); \
- if (!_p) \
- return -ENOMEM; \
- _p; \
- })
-
int __weak setup_kvm_events_tp(struct perf_kvm_stat *kvm __maybe_unused)
{
return 0;
@@ -1688,7 +1680,7 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
for (j = 0; j < events_tp_size; j++) {
- rec_argv[i++] = "-e";
+ rec_argv[i++] = STRDUP_FAIL_EXIT("-e");
rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]);
}
@@ -1696,7 +1688,7 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name);
for (j = 1; j < (unsigned int)argc; j++, i++)
- rec_argv[i] = argv[j];
+ rec_argv[i] = STRDUP_FAIL_EXIT(argv[j]);
set_option_flag(record_options, 'e', "event", PARSE_OPT_HIDDEN);
set_option_flag(record_options, 0, "filter", PARSE_OPT_HIDDEN);
@@ -1719,7 +1711,13 @@ kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
set_option_flag(record_options, 0, "transaction", PARSE_OPT_DISABLED);
record_usage = kvm_stat_record_usage;
- return cmd_record(i, rec_argv);
+ ret = cmd_record(i, rec_argv);
+
+EXIT:
+ for (i = 0; i < rec_argc; i++)
+ free((void *)rec_argv[i]);
+ free(rec_argv);
+ return ret;
}
static int
@@ -2000,58 +1998,122 @@ static int __cmd_record(const char *file_name, int argc, const char **argv)
int rec_argc, i = 0, j, ret;
const char **rec_argv;
- ret = kvm_add_default_arch_event(&argc, argv);
- if (ret)
- return -EINVAL;
-
- rec_argc = argc + 2;
+ /*
+ * Besides the 2 more options "-o" and "filename",
+ * kvm_add_default_arch_event() may add 2 extra options,
+ * so allocate 4 more items.
+ */
+ rec_argc = argc + 2 + 2;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
- rec_argv[i++] = strdup("record");
- rec_argv[i++] = strdup("-o");
- rec_argv[i++] = strdup(file_name);
+ if (!rec_argv)
+ return -ENOMEM;
+
+ rec_argv[i++] = STRDUP_FAIL_EXIT("record");
+ rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
+ rec_argv[i++] = STRDUP_FAIL_EXIT(file_name);
for (j = 1; j < argc; j++, i++)
- rec_argv[i] = argv[j];
+ rec_argv[i] = STRDUP_FAIL_EXIT(argv[j]);
- BUG_ON(i != rec_argc);
+ BUG_ON(i + 2 != rec_argc);
+
+ ret = kvm_add_default_arch_event(&i, rec_argv);
+ if (ret)
+ goto EXIT;
+
+ ret = cmd_record(i, rec_argv);
- return cmd_record(i, rec_argv);
+EXIT:
+ for (i = 0; i < rec_argc; i++)
+ free((void *)rec_argv[i]);
+ free(rec_argv);
+ return ret;
}
static int __cmd_report(const char *file_name, int argc, const char **argv)
{
- int rec_argc, i = 0, j;
+ int rec_argc, i = 0, j, ret;
const char **rec_argv;
rec_argc = argc + 2;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
- rec_argv[i++] = strdup("report");
- rec_argv[i++] = strdup("-i");
- rec_argv[i++] = strdup(file_name);
+ if (!rec_argv)
+ return -ENOMEM;
+
+ rec_argv[i++] = STRDUP_FAIL_EXIT("report");
+ rec_argv[i++] = STRDUP_FAIL_EXIT("-i");
+ rec_argv[i++] = STRDUP_FAIL_EXIT(file_name);
for (j = 1; j < argc; j++, i++)
- rec_argv[i] = argv[j];
+ rec_argv[i] = STRDUP_FAIL_EXIT(argv[j]);
BUG_ON(i != rec_argc);
- return cmd_report(i, rec_argv);
+ ret = cmd_report(i, rec_argv);
+
+EXIT:
+ for (i = 0; i < rec_argc; i++)
+ free((void *)rec_argv[i]);
+ free(rec_argv);
+ return ret;
}
static int
__cmd_buildid_list(const char *file_name, int argc, const char **argv)
{
- int rec_argc, i = 0, j;
+ int rec_argc, i = 0, j, ret;
const char **rec_argv;
rec_argc = argc + 2;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
- rec_argv[i++] = strdup("buildid-list");
- rec_argv[i++] = strdup("-i");
- rec_argv[i++] = strdup(file_name);
+ if (!rec_argv)
+ return -ENOMEM;
+
+ rec_argv[i++] = STRDUP_FAIL_EXIT("buildid-list");
+ rec_argv[i++] = STRDUP_FAIL_EXIT("-i");
+ rec_argv[i++] = STRDUP_FAIL_EXIT(file_name);
for (j = 1; j < argc; j++, i++)
- rec_argv[i] = argv[j];
+ rec_argv[i] = STRDUP_FAIL_EXIT(argv[j]);
BUG_ON(i != rec_argc);
- return cmd_buildid_list(i, rec_argv);
+ ret = cmd_buildid_list(i, rec_argv);
+
+EXIT:
+ for (i = 0; i < rec_argc; i++)
+ free((void *)rec_argv[i]);
+ free(rec_argv);
+ return ret;
+}
+
+static int __cmd_top(int argc, const char **argv)
+{
+ int rec_argc, i = 0, ret;
+ const char **rec_argv;
+
+ /*
+ * kvm_add_default_arch_event() may add 2 extra options, so
+ * allocate 2 more pointers in adavance.
+ */
+ rec_argc = argc + 2;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+ if (!rec_argv)
+ return -ENOMEM;
+
+ for (i = 0; i < argc; i++)
+ rec_argv[i] = STRDUP_FAIL_EXIT(argv[i]);
+
+ BUG_ON(i != argc);
+
+ ret = kvm_add_default_arch_event(&i, rec_argv);
+ if (ret)
+ goto EXIT;
+
+ ret = cmd_top(i, rec_argv);
+
+EXIT:
+ for (i = 0; i < rec_argc; i++)
+ free((void *)rec_argv[i]);
+ free(rec_argv);
+ return ret;
}
int cmd_kvm(int argc, const char **argv)
@@ -2114,7 +2176,7 @@ int cmd_kvm(int argc, const char **argv)
else if (strlen(argv[0]) > 2 && strstarts("diff", argv[0]))
return cmd_diff(argc, argv);
else if (!strcmp(argv[0], "top"))
- return cmd_top(argc, argv);
+ return __cmd_top(argc, argv);
else if (strlen(argv[0]) > 2 && strstarts("buildid-list", argv[0]))
return __cmd_buildid_list(file_name, argc, argv);
#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index d2e08de5976d..7f3068264568 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -2273,12 +2273,23 @@ static void setup_event_list(struct perf_kwork *kwork,
pr_debug("\n");
}
+#define STRDUP_FAIL_EXIT(s) \
+ ({ char *_p; \
+ _p = strdup(s); \
+ if (!_p) { \
+ ret = -ENOMEM; \
+ goto EXIT; \
+ } \
+ _p; \
+ })
+
static int perf_kwork__record(struct perf_kwork *kwork,
int argc, const char **argv)
{
const char **rec_argv;
unsigned int rec_argc, i, j;
struct kwork_class *class;
+ int ret;
const char *const record_args[] = {
"record",
@@ -2298,17 +2309,17 @@ static int perf_kwork__record(struct perf_kwork *kwork,
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(record_args); i++)
- rec_argv[i] = strdup(record_args[i]);
+ rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
list_for_each_entry(class, &kwork->class_list, list) {
for (j = 0; j < class->nr_tracepoints; j++) {
- rec_argv[i++] = strdup("-e");
- rec_argv[i++] = strdup(class->tp_handlers[j].name);
+ rec_argv[i++] = STRDUP_FAIL_EXIT("-e");
+ rec_argv[i++] = STRDUP_FAIL_EXIT(class->tp_handlers[j].name);
}
}
for (j = 1; j < (unsigned int)argc; j++, i++)
- rec_argv[i] = argv[j];
+ rec_argv[i] = STRDUP_FAIL_EXIT(argv[j]);
BUG_ON(i != rec_argc);
@@ -2317,7 +2328,13 @@ static int perf_kwork__record(struct perf_kwork *kwork,
pr_debug("%s ", rec_argv[j]);
pr_debug("\n");
- return cmd_record(i, rec_argv);
+ ret = cmd_record(i, rec_argv);
+
+EXIT:
+ for (i = 0; i < rec_argc; i++)
+ free((void *)rec_argv[i]);
+ free(rec_argv);
+ return ret;
}
int cmd_kwork(int argc, const char **argv)
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index caf42276bd0f..5cbca0bacd35 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -130,7 +130,7 @@ static void default_print_event(void *ps, const char *topic,
if (deprecated && !print_state->deprecated)
return;
- if (print_state->pmu_glob && pmu_name && !strglobmatch(pmu_name, print_state->pmu_glob))
+ if (print_state->pmu_glob && (!pmu_name || !strglobmatch(pmu_name, print_state->pmu_glob)))
return;
if (print_state->exclude_abi && pmu_type < PERF_TYPE_MAX && pmu_type != PERF_TYPE_RAW)
@@ -283,8 +283,8 @@ static void default_print_metric(void *ps,
}
struct json_print_state {
- /** @fp: File to write output to. */
- FILE *fp;
+ /** The shared print_state */
+ struct print_state common;
/** Should a separator be printed prior to the next item? */
bool need_sep;
};
@@ -292,7 +292,7 @@ struct json_print_state {
static void json_print_start(void *ps)
{
struct json_print_state *print_state = ps;
- FILE *fp = print_state->fp;
+ FILE *fp = print_state->common.fp;
fprintf(fp, "[\n");
}
@@ -300,7 +300,7 @@ static void json_print_start(void *ps)
static void json_print_end(void *ps)
{
struct json_print_state *print_state = ps;
- FILE *fp = print_state->fp;
+ FILE *fp = print_state->common.fp;
fprintf(fp, "%s]\n", print_state->need_sep ? "\n" : "");
}
@@ -370,9 +370,26 @@ static void json_print_event(void *ps, const char *topic,
{
struct json_print_state *print_state = ps;
bool need_sep = false;
- FILE *fp = print_state->fp;
+ FILE *fp = print_state->common.fp;
struct strbuf buf;
+ if (deprecated && !print_state->common.deprecated)
+ return;
+
+ if (print_state->common.pmu_glob &&
+ (!pmu_name || !strglobmatch(pmu_name, print_state->common.pmu_glob)))
+ return;
+
+ if (print_state->common.exclude_abi && pmu_type < PERF_TYPE_MAX &&
+ pmu_type != PERF_TYPE_RAW)
+ return;
+
+ if (print_state->common.event_glob &&
+ (!event_name || !strglobmatch(event_name, print_state->common.event_glob)) &&
+ (!event_alias || !strglobmatch(event_alias, print_state->common.event_glob)) &&
+ (!topic || !strglobmatch_nocase(topic, print_state->common.event_glob)))
+ return;
+
strbuf_init(&buf, 0);
fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : "");
print_state->need_sep = true;
@@ -446,9 +463,16 @@ static void json_print_metric(void *ps __maybe_unused, const char *group,
{
struct json_print_state *print_state = ps;
bool need_sep = false;
- FILE *fp = print_state->fp;
+ FILE *fp = print_state->common.fp;
struct strbuf buf;
+ if (print_state->common.event_glob &&
+ (!print_state->common.metrics || !name ||
+ !strglobmatch(name, print_state->common.event_glob)) &&
+ (!print_state->common.metricgroups || !group ||
+ !strglobmatch(group, print_state->common.event_glob)))
+ return;
+
strbuf_init(&buf, 0);
fprintf(fp, "%s{\n", print_state->need_sep ? ",\n" : "");
print_state->need_sep = true;
@@ -521,10 +545,12 @@ int cmd_list(int argc, const char **argv)
.fp = stdout,
.desc = true,
};
- struct print_state json_ps = {
- .fp = stdout,
+ struct json_print_state json_ps = {
+ .common = {
+ .fp = stdout,
+ },
};
- void *ps = &default_ps;
+ struct print_state *ps = &default_ps;
struct print_callbacks print_cb = {
.print_start = default_print_start,
.print_end = default_print_end,
@@ -572,9 +598,11 @@ int cmd_list(int argc, const char **argv)
argc = parse_options(argc, argv, list_options, list_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
+ if (json)
+ ps = &json_ps.common;
+
if (output_path) {
- default_ps.fp = fopen(output_path, "w");
- json_ps.fp = default_ps.fp;
+ ps->fp = fopen(output_path, "w");
}
setup_pager();
@@ -590,14 +618,13 @@ int cmd_list(int argc, const char **argv)
.print_metric = json_print_metric,
.skip_duplicate_pmus = json_skip_duplicate_pmus,
};
- ps = &json_ps;
} else {
- default_ps.last_topic = strdup("");
- assert(default_ps.last_topic);
- default_ps.visited_metrics = strlist__new(NULL, NULL);
- assert(default_ps.visited_metrics);
+ ps->last_topic = strdup("");
+ assert(ps->last_topic);
+ ps->visited_metrics = strlist__new(NULL, NULL);
+ assert(ps->visited_metrics);
if (unit_name)
- default_ps.pmu_glob = strdup(unit_name);
+ ps->pmu_glob = strdup(unit_name);
else if (cputype) {
const struct perf_pmu *pmu = perf_pmus__pmu_for_pmu_filter(cputype);
@@ -606,14 +633,16 @@ int cmd_list(int argc, const char **argv)
ret = -1;
goto out;
}
- default_ps.pmu_glob = strdup(pmu->name);
+ ps->pmu_glob = strdup(pmu->name);
}
}
print_cb.print_start(ps);
if (argc == 0) {
- default_ps.metrics = true;
- default_ps.metricgroups = true;
+ if (!unit_name) {
+ ps->metrics = true;
+ ps->metricgroups = true;
+ }
print_events(&print_cb, ps);
goto out;
}
@@ -633,41 +662,58 @@ int cmd_list(int argc, const char **argv)
zfree(&default_ps.pmu_glob);
default_ps.pmu_glob = old_pmu_glob;
} else if (strcmp(argv[i], "hw") == 0 ||
- strcmp(argv[i], "hardware") == 0)
- print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE,
- event_symbols_hw, PERF_COUNT_HW_MAX);
- else if (strcmp(argv[i], "sw") == 0 ||
+ strcmp(argv[i], "hardware") == 0) {
+ char *old_event_glob = ps->event_glob;
+
+ ps->event_glob = strdup("legacy hardware");
+ if (!ps->event_glob) {
+ ret = -1;
+ goto out;
+ }
+ perf_pmus__print_pmu_events(&print_cb, ps);
+ zfree(&ps->event_glob);
+ ps->event_glob = old_event_glob;
+ } else if (strcmp(argv[i], "sw") == 0 ||
strcmp(argv[i], "software") == 0) {
- char *old_pmu_glob = default_ps.pmu_glob;
+ char *old_pmu_glob = ps->pmu_glob;
static const char * const sw_globs[] = { "software", "tool" };
for (size_t j = 0; j < ARRAY_SIZE(sw_globs); j++) {
- default_ps.pmu_glob = strdup(sw_globs[j]);
- if (!default_ps.pmu_glob) {
+ ps->pmu_glob = strdup(sw_globs[j]);
+ if (!ps->pmu_glob) {
ret = -1;
goto out;
}
perf_pmus__print_pmu_events(&print_cb, ps);
- zfree(&default_ps.pmu_glob);
+ zfree(&ps->pmu_glob);
}
- default_ps.pmu_glob = old_pmu_glob;
+ ps->pmu_glob = old_pmu_glob;
} else if (strcmp(argv[i], "cache") == 0 ||
- strcmp(argv[i], "hwcache") == 0)
- print_hwcache_events(&print_cb, ps);
- else if (strcmp(argv[i], "pmu") == 0) {
- default_ps.exclude_abi = true;
+ strcmp(argv[i], "hwcache") == 0) {
+ char *old_event_glob = ps->event_glob;
+
+ ps->event_glob = strdup("legacy cache");
+ if (!ps->event_glob) {
+ ret = -1;
+ goto out;
+ }
perf_pmus__print_pmu_events(&print_cb, ps);
- default_ps.exclude_abi = false;
+ zfree(&ps->event_glob);
+ ps->event_glob = old_event_glob;
+ } else if (strcmp(argv[i], "pmu") == 0) {
+ ps->exclude_abi = true;
+ perf_pmus__print_pmu_events(&print_cb, ps);
+ ps->exclude_abi = false;
} else if (strcmp(argv[i], "sdt") == 0)
print_sdt_events(&print_cb, ps);
else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) {
- default_ps.metricgroups = false;
- default_ps.metrics = true;
+ ps->metricgroups = false;
+ ps->metrics = true;
metricgroup__print(&print_cb, ps);
} else if (strcmp(argv[i], "metricgroup") == 0 ||
strcmp(argv[i], "metricgroups") == 0) {
- default_ps.metricgroups = true;
- default_ps.metrics = false;
+ ps->metricgroups = true;
+ ps->metrics = false;
metricgroup__print(&print_cb, ps);
}
#ifdef HAVE_LIBPFM
@@ -675,43 +721,40 @@ int cmd_list(int argc, const char **argv)
print_libpfm_events(&print_cb, ps);
#endif
else if ((sep = strchr(argv[i], ':')) != NULL) {
- char *old_pmu_glob = default_ps.pmu_glob;
- char *old_event_glob = default_ps.event_glob;
+ char *old_pmu_glob = ps->pmu_glob;
+ char *old_event_glob = ps->event_glob;
- default_ps.event_glob = strdup(argv[i]);
- if (!default_ps.event_glob) {
+ ps->event_glob = strdup(argv[i]);
+ if (!ps->event_glob) {
ret = -1;
goto out;
}
- default_ps.pmu_glob = strdup("tracepoint");
- if (!default_ps.pmu_glob) {
- zfree(&default_ps.event_glob);
+ ps->pmu_glob = strdup("tracepoint");
+ if (!ps->pmu_glob) {
+ zfree(&ps->event_glob);
ret = -1;
goto out;
}
perf_pmus__print_pmu_events(&print_cb, ps);
- zfree(&default_ps.pmu_glob);
- default_ps.pmu_glob = old_pmu_glob;
+ zfree(&ps->pmu_glob);
+ ps->pmu_glob = old_pmu_glob;
print_sdt_events(&print_cb, ps);
- default_ps.metrics = true;
- default_ps.metricgroups = true;
+ ps->metrics = true;
+ ps->metricgroups = true;
metricgroup__print(&print_cb, ps);
- zfree(&default_ps.event_glob);
- default_ps.event_glob = old_event_glob;
+ zfree(&ps->event_glob);
+ ps->event_glob = old_event_glob;
} else {
if (asprintf(&s, "*%s*", argv[i]) < 0) {
printf("Critical: Not enough memory! Trying to continue...\n");
continue;
}
- default_ps.event_glob = s;
- print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE,
- event_symbols_hw, PERF_COUNT_HW_MAX);
- print_hwcache_events(&print_cb, ps);
+ ps->event_glob = s;
perf_pmus__print_pmu_events(&print_cb, ps);
print_sdt_events(&print_cb, ps);
- default_ps.metrics = true;
- default_ps.metricgroups = true;
+ ps->metrics = true;
+ ps->metricgroups = true;
metricgroup__print(&print_cb, ps);
free(s);
}
@@ -719,12 +762,12 @@ int cmd_list(int argc, const char **argv)
out:
print_cb.print_end(ps);
- free(default_ps.pmu_glob);
- free(default_ps.last_topic);
- free(default_ps.last_metricgroups);
- strlist__delete(default_ps.visited_metrics);
+ free(ps->pmu_glob);
+ free(ps->last_topic);
+ free(ps->last_metricgroups);
+ strlist__delete(ps->visited_metrics);
if (output_path)
- fclose(default_ps.fp);
+ fclose(ps->fp);
return ret;
}
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index fd49703021fd..e8962c985d34 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -1867,6 +1867,7 @@ static int __cmd_report(bool display_info)
eops.sample = process_sample_event;
eops.comm = perf_event__process_comm;
eops.mmap = perf_event__process_mmap;
+ eops.mmap2 = perf_event__process_mmap2;
eops.namespaces = perf_event__process_namespaces;
eops.tracing_data = perf_event__process_tracing_data;
session = perf_session__new(&data, &eops);
@@ -2009,6 +2010,7 @@ static int __cmd_contention(int argc, const char **argv)
.owner = show_lock_owner,
.cgroups = RB_ROOT,
};
+ struct perf_env host_env;
lockhash_table = calloc(LOCKHASH_SIZE, sizeof(*lockhash_table));
if (!lockhash_table)
@@ -2022,9 +2024,13 @@ static int __cmd_contention(int argc, const char **argv)
eops.sample = process_sample_event;
eops.comm = perf_event__process_comm;
eops.mmap = perf_event__process_mmap;
+ eops.mmap2 = perf_event__process_mmap2;
eops.tracing_data = perf_event__process_tracing_data;
- session = perf_session__new(use_bpf ? NULL : &data, &eops);
+ perf_env__init(&host_env);
+ session = __perf_session__new(use_bpf ? NULL : &data, &eops,
+ /*trace_event_repipe=*/false, &host_env);
+
if (IS_ERR(session)) {
pr_err("Initializing perf session failed\n");
err = PTR_ERR(session);
@@ -2142,6 +2148,7 @@ out_delete:
evlist__delete(con.evlist);
lock_contention_finish(&con);
perf_session__delete(session);
+ perf_env__exit(&host_env);
zfree(&lockhash_table);
return err;
}
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index c6496adff3fe..d43500b92a7b 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
#include <inttypes.h>
#include <sys/types.h>
#include <sys/stat.h>
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7ea3a11aca70..2584d0d8bc82 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -730,8 +730,6 @@ static void record__sig_exit(void)
raise(signr);
}
-#ifdef HAVE_AUXTRACE_SUPPORT
-
static int record__process_auxtrace(const struct perf_tool *tool,
struct mmap *map,
union perf_event *event, void *data1,
@@ -889,40 +887,6 @@ static int record__auxtrace_init(struct record *rec)
return auxtrace_parse_filters(rec->evlist);
}
-#else
-
-static inline
-int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
- struct mmap *map __maybe_unused)
-{
- return 0;
-}
-
-static inline
-void record__read_auxtrace_snapshot(struct record *rec __maybe_unused,
- bool on_exit __maybe_unused)
-{
-}
-
-static inline
-int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
-{
- return 0;
-}
-
-static inline
-int record__auxtrace_snapshot_exit(struct record *rec __maybe_unused)
-{
- return 0;
-}
-
-static int record__auxtrace_init(struct record *rec __maybe_unused)
-{
- return 0;
-}
-
-#endif
-
static int record__config_text_poke(struct evlist *evlist)
{
struct evsel *evsel;
@@ -983,7 +947,6 @@ static int record__config_tracking_events(struct record *rec)
*/
if (opts->target.initial_delay || target__has_cpu(&opts->target) ||
perf_pmus__num_core_pmus() > 1) {
-
/*
* User space tasks can migrate between CPUs, so when tracing
* selected CPUs, sideband for all CPUs is still needed.
@@ -1388,10 +1351,27 @@ static int record__open(struct record *rec)
struct perf_session *session = rec->session;
struct record_opts *opts = &rec->opts;
int rc = 0;
+ bool skipped = false;
+ bool removed_tracking = false;
evlist__for_each_entry(evlist, pos) {
+ if (removed_tracking) {
+ /*
+ * Normally the head of the list has tracking enabled
+ * for sideband data like mmaps. If this event is
+ * removed, make sure to add tracking to the next
+ * processed event.
+ */
+ if (!pos->tracking) {
+ pos->tracking = true;
+ evsel__config(pos, opts, &callchain_param);
+ }
+ removed_tracking = false;
+ }
try_again:
if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
+ bool report_error = true;
+
if (evsel__fallback(pos, &opts->target, errno, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
@@ -1403,15 +1383,72 @@ try_again:
pos = evlist__reset_weak_group(evlist, pos, true);
goto try_again;
}
- rc = -errno;
- evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
- ui__error("%s\n", msg);
- goto out;
+#if defined(__aarch64__) || defined(__arm__)
+ if (strstr(evsel__name(pos), "cycles")) {
+ struct evsel *pos2;
+ /*
+ * Unfortunately ARM has many events named
+ * "cycles" on PMUs like the system-level (L3)
+ * cache which don't support sampling. Only
+ * display such failures to open when there is
+ * only 1 cycles event or verbose is enabled.
+ */
+ evlist__for_each_entry(evlist, pos2) {
+ if (pos2 == pos)
+ continue;
+ if (strstr(evsel__name(pos2), "cycles")) {
+ report_error = false;
+ break;
+ }
+ }
+ }
+#endif
+ if (report_error || verbose > 0) {
+ ui__error("Failure to open event '%s' on PMU '%s' which will be "
+ "removed.\n%s\n",
+ evsel__name(pos), evsel__pmu_name(pos), msg);
+ }
+ if (pos->tracking)
+ removed_tracking = true;
+ pos->skippable = true;
+ skipped = true;
}
-
- pos->supported = true;
}
+ if (skipped) {
+ struct evsel *tmp;
+ int idx = 0;
+ bool evlist_empty = true;
+
+ /* Remove evsels that failed to open and update indices. */
+ evlist__for_each_entry_safe(evlist, tmp, pos) {
+ if (pos->skippable) {
+ evlist__remove(evlist, pos);
+ continue;
+ }
+
+ /*
+ * Note, dummy events may be command line parsed or
+ * added by the tool. We care about supporting `perf
+ * record -e dummy` which may be used as a permission
+ * check. Dummy events that are added to the command
+ * line and opened along with other events that fail,
+ * will still fail as if the dummy events were tool
+ * added events for the sake of code simplicity.
+ */
+ if (!evsel__is_dummy_event(pos))
+ evlist_empty = false;
+ }
+ evlist__for_each_entry(evlist, pos) {
+ pos->core.idx = idx++;
+ }
+ /* If list is empty then fail. */
+ if (evlist_empty) {
+ ui__error("Failure to open any events for recording.\n");
+ rc = -1;
+ goto out;
+ }
+ }
if (symbol_conf.kptr_restrict && !evlist__exclude_kernel(evlist)) {
pr_warning(
"WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n"
@@ -1817,15 +1854,14 @@ record__finish_output(struct record *rec)
}
/* Buildid scanning disabled or build ID in kernel and synthesized map events. */
- if (!rec->no_buildid) {
+ if (!rec->no_buildid || !rec->no_buildid_cache) {
process_buildids(rec);
if (rec->buildid_all)
perf_session__dsos_hit_all(rec->session);
}
perf_session__write_header(rec->session, rec->evlist, fd, true);
-
- return;
+ perf_session__cache_build_ids(rec->session);
}
static int record__synthesize_workload(struct record *rec, bool tail)
@@ -2885,11 +2921,11 @@ out_free_threads:
rec->bytes_written += off_cpu_write(rec->session);
record__read_lost_samples(rec);
- record__synthesize(rec, true);
/* this will be recalculated during process_buildids() */
rec->samples = 0;
if (!err) {
+ record__synthesize(rec, true);
if (!rec->timestamp_filename) {
record__finish_output(rec);
} else {
@@ -3010,7 +3046,7 @@ static int perf_record_config(const char *var, const char *value, void *cb)
else if (!strcmp(value, "no-cache"))
rec->no_buildid_cache = true;
else if (!strcmp(value, "skip"))
- rec->no_buildid = true;
+ rec->no_buildid = rec->no_buildid_cache = true;
else if (!strcmp(value, "mmap"))
rec->buildid_mmap = true;
else if (!strcmp(value, "no-mmap"))
@@ -4119,24 +4155,25 @@ int cmd_record(int argc, const char **argv)
record.opts.record_switch_events = true;
}
- if (!rec->buildid_mmap) {
- pr_debug("Disabling build id in synthesized mmap2 events.\n");
- symbol_conf.no_buildid_mmap2 = true;
- } else if (rec->buildid_mmap_set) {
- /*
- * Explicitly passing --buildid-mmap disables buildid processing
- * and cache generation.
- */
- rec->no_buildid = true;
- }
if (rec->buildid_mmap && !perf_can_record_build_id()) {
pr_warning("Missing support for build id in kernel mmap events.\n"
"Disable this warning with --no-buildid-mmap\n");
rec->buildid_mmap = false;
}
+
if (rec->buildid_mmap) {
/* Enable perf_event_attr::build_id bit. */
rec->opts.build_id = true;
+ /* Disable build-ID table in the header. */
+ rec->no_buildid = true;
+ } else {
+ pr_debug("Disabling build id in synthesized mmap2 events.\n");
+ symbol_conf.no_buildid_mmap2 = true;
+ }
+
+ if (rec->no_buildid_set && rec->no_buildid) {
+ /* -B implies -N for historic reasons. */
+ rec->no_buildid_cache = true;
}
if (rec->opts.record_cgroup && !perf_can_record_cgroup()) {
@@ -4233,7 +4270,7 @@ int cmd_record(int argc, const char **argv)
err = -ENOMEM;
- if (rec->no_buildid_cache || rec->no_buildid) {
+ if (rec->no_buildid_cache) {
disable_buildid_cache();
} else if (rec->switch_output.enabled) {
/*
@@ -4268,9 +4305,13 @@ int cmd_record(int argc, const char **argv)
record.opts.tail_synthesize = true;
if (rec->evlist->core.nr_entries == 0) {
- err = parse_event(rec->evlist, "cycles:P");
- if (err)
+ struct evlist *def_evlist = evlist__new_default();
+
+ if (!def_evlist)
goto out;
+
+ evlist__splice_list_tail(rec->evlist, &def_evlist->core.entries);
+ evlist__delete(def_evlist);
}
if (rec->opts.target.tid && !rec->opts.no_inherit_set)
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 35df04dad2fd..add6b1c2aaf0 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -240,10 +240,11 @@ static void setup_forced_leader(struct report *report,
evlist__force_leader(evlist);
}
-static int process_feature_event(struct perf_session *session,
+static int process_feature_event(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event)
{
- struct report *rep = container_of(session->tool, struct report, tool);
+ struct report *rep = container_of(tool, struct report, tool);
if (event->feat.feat_id < HEADER_LAST_FEATURE)
return perf_event__process_feature(session, event);
@@ -1613,6 +1614,7 @@ repeat:
report.tool.event_update = perf_event__process_event_update;
report.tool.feature = process_feature_event;
report.tool.ordering_requires_timestamps = true;
+ report.tool.merge_deferred_callchains = !dump_trace;
session = perf_session__new(&data, &report.tool);
if (IS_ERR(session)) {
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index f166d6cbc083..eca3b1c58c4b 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1532,35 +1532,24 @@ static int process_sched_wakeup_ignore(const struct perf_tool *tool __maybe_unus
return 0;
}
-union map_priv {
- void *ptr;
- bool color;
-};
-
static bool thread__has_color(struct thread *thread)
{
- union map_priv priv = {
- .ptr = thread__priv(thread),
- };
-
- return priv.color;
+ return thread__priv(thread) != NULL;
}
static struct thread*
map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid, pid_t tid)
{
struct thread *thread = machine__findnew_thread(machine, pid, tid);
- union map_priv priv = {
- .color = false,
- };
+ bool color = false;
if (!sched->map.color_pids || !thread || thread__priv(thread))
return thread;
if (thread_map__has(sched->map.color_pids, tid))
- priv.color = true;
+ color = true;
- thread__set_priv(thread, priv.ptr);
+ thread__set_priv(thread, color ? ((void*)1) : NULL);
return thread;
}
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index d9fbdcf72f25..62e43d3c5ad7 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -33,6 +33,7 @@
#include "util/path.h"
#include "util/event.h"
#include "util/mem-info.h"
+#include "util/metricgroup.h"
#include "ui/ui.h"
#include "print_binary.h"
#include "print_insn.h"
@@ -43,6 +44,7 @@
#include <linux/stringify.h>
#include <linux/time64.h>
#include <linux/zalloc.h>
+#include <linux/unaligned.h>
#include <sys/utsname.h>
#include "asm/bug.h"
#include "util/mem-events.h"
@@ -223,7 +225,7 @@ enum {
OUTPUT_TYPE_MAX
};
-// We need to refactor the evsel->priv use in in 'perf script' to allow for
+// We need to refactor the evsel->priv use in 'perf script' to allow for
// using that area, that is being used only in some cases.
#define OUTPUT_TYPE_UNSET -1
@@ -340,16 +342,8 @@ struct evsel_script {
char *filename;
FILE *fp;
u64 samples;
- /* For metric output */
- u64 val;
- int gnum;
};
-static inline struct evsel_script *evsel_script(struct evsel *evsel)
-{
- return (struct evsel_script *)evsel->priv;
-}
-
static struct evsel_script *evsel_script__new(struct evsel *evsel, struct perf_data *data)
{
struct evsel_script *es = zalloc(sizeof(*es));
@@ -1224,7 +1218,6 @@ static int any_dump_insn(struct evsel *evsel __maybe_unused,
u8 *inbuf, int inlen, int *lenp,
FILE *fp)
{
-#ifdef HAVE_LIBCAPSTONE_SUPPORT
if (PRINT_FIELD(BRSTACKDISASM)) {
int printed = fprintf_insn_asm(x->machine, x->thread, x->cpumode, x->is64bit,
(uint8_t *)inbuf, inlen, ip, lenp,
@@ -1233,7 +1226,6 @@ static int any_dump_insn(struct evsel *evsel __maybe_unused,
if (printed > 0)
return printed;
}
-#endif
return fprintf(fp, "%s", dump_insn(x, ip, inbuf, inlen, lenp));
}
@@ -2003,6 +1995,25 @@ static int perf_sample__fprintf_synth_iflag_chg(struct perf_sample *sample, FILE
return len + perf_sample__fprintf_pt_spacing(len, fp);
}
+static int perf_sample__fprintf_synth_vpadtl(struct perf_sample *data, FILE *fp)
+{
+ struct powerpc_vpadtl_entry *dtl = (struct powerpc_vpadtl_entry *)data->raw_data;
+ int len;
+
+ len = fprintf(fp, "timebase: %" PRIu64 " dispatch_reason:%s, preempt_reason:%s,\n"
+ "enqueue_to_dispatch_time:%d, ready_to_enqueue_time:%d,"
+ "waiting_to_ready_time:%d, processor_id: %d",
+ get_unaligned_be64(&dtl->timebase),
+ dispatch_reasons[dtl->dispatch_reason],
+ preempt_reasons[dtl->preempt_reason],
+ be32_to_cpu(dtl->enqueue_to_dispatch_time),
+ be32_to_cpu(dtl->ready_to_enqueue_time),
+ be32_to_cpu(dtl->waiting_to_ready_time),
+ be16_to_cpu(dtl->processor_id));
+
+ return len;
+}
+
static int perf_sample__fprintf_synth(struct perf_sample *sample,
struct evsel *evsel, FILE *fp)
{
@@ -2025,6 +2036,8 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
return perf_sample__fprintf_synth_evt(sample, fp);
case PERF_SYNTH_INTEL_IFLAG_CHG:
return perf_sample__fprintf_synth_iflag_chg(sample, fp);
+ case PERF_SYNTH_POWERPC_VPA_DTL:
+ return perf_sample__fprintf_synth_vpadtl(sample, fp);
default:
break;
}
@@ -2104,13 +2117,161 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused,
fputs("\tmetric: ", mctx->fp);
}
-static void perf_sample__fprint_metric(struct perf_script *script,
- struct thread *thread,
+struct script_find_metrics_args {
+ struct evlist *evlist;
+ bool system_wide;
+};
+
+static struct evsel *map_metric_evsel_to_script_evsel(struct evlist *script_evlist,
+ struct evsel *metric_evsel)
+{
+ struct evsel *script_evsel;
+
+ evlist__for_each_entry(script_evlist, script_evsel) {
+ /* Skip if perf_event_attr differ. */
+ if (metric_evsel->core.attr.type != script_evsel->core.attr.type)
+ continue;
+ if (metric_evsel->core.attr.config != script_evsel->core.attr.config)
+ continue;
+ /* Skip if the script event has a metric_id that doesn't match. */
+ if (script_evsel->metric_id &&
+ strcmp(evsel__metric_id(metric_evsel), evsel__metric_id(script_evsel))) {
+ pr_debug("Skipping matching evsel due to differing metric ids '%s' vs '%s'\n",
+ evsel__metric_id(metric_evsel), evsel__metric_id(script_evsel));
+ continue;
+ }
+ return script_evsel;
+ }
+ return NULL;
+}
+
+static int script_find_metrics(const struct pmu_metric *pm,
+ const struct pmu_metrics_table *table __maybe_unused,
+ void *data)
+{
+ struct script_find_metrics_args *args = data;
+ struct evlist *script_evlist = args->evlist;
+ struct evlist *metric_evlist = evlist__new();
+ struct evsel *metric_evsel;
+ int ret = metricgroup__parse_groups(metric_evlist,
+ /*pmu=*/"all",
+ pm->metric_name,
+ /*metric_no_group=*/false,
+ /*metric_no_merge=*/false,
+ /*metric_no_threshold=*/true,
+ /*user_requested_cpu_list=*/NULL,
+ args->system_wide,
+ /*hardware_aware_grouping=*/false);
+
+ if (ret) {
+ /* Metric parsing failed but continue the search. */
+ goto out;
+ }
+
+ /*
+ * Check the script_evlist has an entry for each metric_evlist entry. If
+ * the script evsel was already set up avoid changing data that may
+ * break it.
+ */
+ evlist__for_each_entry(metric_evlist, metric_evsel) {
+ struct evsel *script_evsel =
+ map_metric_evsel_to_script_evsel(script_evlist, metric_evsel);
+ struct evsel *new_metric_leader;
+
+ if (!script_evsel) {
+ pr_debug("Skipping metric '%s' as evsel '%s' / '%s' is missing\n",
+ pm->metric_name, evsel__name(metric_evsel),
+ evsel__metric_id(metric_evsel));
+ goto out;
+ }
+
+ if (script_evsel->metric_leader == NULL)
+ continue;
+
+ if (metric_evsel->metric_leader == metric_evsel) {
+ new_metric_leader = script_evsel;
+ } else {
+ new_metric_leader =
+ map_metric_evsel_to_script_evsel(script_evlist,
+ metric_evsel->metric_leader);
+ }
+ /* Mismatching evsel leaders. */
+ if (script_evsel->metric_leader != new_metric_leader) {
+ pr_debug("Skipping metric '%s' due to mismatching evsel metric leaders '%s' vs '%s'\n",
+ pm->metric_name, evsel__metric_id(metric_evsel),
+ evsel__metric_id(script_evsel));
+ goto out;
+ }
+ }
+ /*
+ * Metric events match those in the script evlist, copy metric evsel
+ * data into the script evlist.
+ */
+ evlist__for_each_entry(metric_evlist, metric_evsel) {
+ struct evsel *script_evsel =
+ map_metric_evsel_to_script_evsel(script_evlist, metric_evsel);
+ struct metric_event *metric_me = metricgroup__lookup(&metric_evlist->metric_events,
+ metric_evsel,
+ /*create=*/false);
+
+ if (script_evsel->metric_id == NULL) {
+ script_evsel->metric_id = metric_evsel->metric_id;
+ metric_evsel->metric_id = NULL;
+ }
+
+ if (script_evsel->metric_leader == NULL) {
+ if (metric_evsel->metric_leader == metric_evsel) {
+ script_evsel->metric_leader = script_evsel;
+ } else {
+ script_evsel->metric_leader =
+ map_metric_evsel_to_script_evsel(script_evlist,
+ metric_evsel->metric_leader);
+ }
+ }
+
+ if (metric_me) {
+ struct metric_expr *expr;
+ struct metric_event *script_me =
+ metricgroup__lookup(&script_evlist->metric_events,
+ script_evsel,
+ /*create=*/true);
+
+ if (!script_me) {
+ /*
+ * As the metric_expr is created, the only
+ * failure is a lack of memory.
+ */
+ goto out;
+ }
+ list_splice_init(&metric_me->head, &script_me->head);
+ list_for_each_entry(expr, &script_me->head, nd) {
+ for (int i = 0; expr->metric_events[i]; i++) {
+ expr->metric_events[i] =
+ map_metric_evsel_to_script_evsel(script_evlist,
+ expr->metric_events[i]);
+ }
+ }
+ }
+ }
+ pr_debug("Found metric '%s' whose evsels match those of in the perf data\n",
+ pm->metric_name);
+ evlist__delete(metric_evlist);
+out:
+ return 0;
+}
+
+static struct aggr_cpu_id script_aggr_cpu_id_get(struct perf_stat_config *config __maybe_unused,
+ struct perf_cpu cpu)
+{
+ return aggr_cpu_id__global(cpu, /*data=*/NULL);
+}
+
+static void perf_sample__fprint_metric(struct thread *thread,
struct evsel *evsel,
struct perf_sample *sample,
FILE *fp)
{
- struct evsel *leader = evsel__leader(evsel);
+ static bool init_metrics;
struct perf_stat_output_ctx ctx = {
.print_metric = script_print_metric,
.new_line = script_new_line,
@@ -2122,23 +2283,84 @@ static void perf_sample__fprint_metric(struct perf_script *script,
},
.force_header = false,
};
- struct evsel *ev2;
- u64 val;
+ struct perf_counts_values *count, *old_count;
+ int cpu_map_idx, thread_map_idx, aggr_idx;
+ struct evsel *pos;
+
+ if (!init_metrics) {
+ /* One time initialization of stat_config and metric data. */
+ struct script_find_metrics_args args = {
+ .evlist = evsel->evlist,
+ .system_wide = perf_thread_map__pid(evsel->core.threads, /*idx=*/0) == -1,
+
+ };
+ if (!stat_config.output)
+ stat_config.output = stdout;
+
+ if (!stat_config.aggr_map) {
+ /* TODO: currently only global aggregation is supported. */
+ assert(stat_config.aggr_mode == AGGR_GLOBAL);
+ stat_config.aggr_get_id = script_aggr_cpu_id_get;
+ stat_config.aggr_map =
+ cpu_aggr_map__new(evsel->evlist->core.user_requested_cpus,
+ aggr_cpu_id__global, /*data=*/NULL,
+ /*needs_sort=*/false);
+ }
+
+ metricgroup__for_each_metric(pmu_metrics_table__find(), script_find_metrics, &args);
+ init_metrics = true;
+ }
- if (!evsel->stats)
- evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false);
- if (evsel_script(leader)->gnum++ == 0)
- perf_stat__reset_shadow_stats();
- val = sample->period * evsel->scale;
- evsel_script(evsel)->val = val;
- if (evsel_script(leader)->gnum == leader->core.nr_members) {
- for_each_group_member (ev2, leader) {
- perf_stat__print_shadow_stats(&stat_config, ev2,
- evsel_script(ev2)->val,
- sample->cpu,
- &ctx);
+ if (!evsel->stats) {
+ if (evlist__alloc_stats(&stat_config, evsel->evlist, /*alloc_raw=*/true) < 0)
+ return;
+ }
+ if (!evsel->stats->aggr) {
+ if (evlist__alloc_aggr_stats(evsel->evlist, stat_config.aggr_map->nr) < 0)
+ return;
+ }
+
+ /* Update the evsel's count using the sample's data. */
+ cpu_map_idx = perf_cpu_map__idx(evsel->core.cpus, (struct perf_cpu){sample->cpu});
+ if (cpu_map_idx < 0) {
+ /* Missing CPU, check for any CPU. */
+ if (perf_cpu_map__cpu(evsel->core.cpus, /*idx=*/0).cpu == -1 ||
+ sample->cpu == (u32)-1) {
+ /* Place the counts in the which ever CPU is first in the map. */
+ cpu_map_idx = 0;
+ } else {
+ pr_info("Missing CPU map entry for CPU %d\n", sample->cpu);
+ return;
+ }
+ }
+ thread_map_idx = perf_thread_map__idx(evsel->core.threads, sample->tid);
+ if (thread_map_idx < 0) {
+ /* Missing thread, check for any thread. */
+ if (perf_thread_map__pid(evsel->core.threads, /*idx=*/0) == -1 ||
+ sample->tid == (u32)-1) {
+ /* Place the counts in the which ever thread is first in the map. */
+ thread_map_idx = 0;
+ } else {
+ pr_info("Missing thread map entry for thread %d\n", sample->tid);
+ return;
+ }
+ }
+ count = perf_counts(evsel->counts, cpu_map_idx, thread_map_idx);
+ old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread_map_idx);
+ count->val = old_count->val + sample->period;
+ count->run = old_count->run + 1;
+ count->ena = old_count->ena + 1;
+
+ /* Update the aggregated stats. */
+ perf_stat_process_counter(&stat_config, evsel);
+
+ /* Display all metrics. */
+ evlist__for_each_entry(evsel->evlist, pos) {
+ cpu_aggr_map__for_each_idx(aggr_idx, stat_config.aggr_map) {
+ perf_stat__print_shadow_stats(&stat_config, pos,
+ aggr_idx,
+ &ctx);
}
- evsel_script(leader)->gnum = 0;
}
}
@@ -2320,7 +2542,7 @@ static void process_event(struct perf_script *script,
}
if (PRINT_FIELD(METRIC))
- perf_sample__fprint_metric(script, thread, evsel, sample, fp);
+ perf_sample__fprint_metric(thread, evsel, sample, fp);
if (verbose > 0)
fflush(fp);
@@ -2484,6 +2706,94 @@ out_put:
return ret;
}
+static int process_deferred_sample_event(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel,
+ struct machine *machine)
+{
+ struct perf_script *scr = container_of(tool, struct perf_script, tool);
+ struct perf_event_attr *attr = &evsel->core.attr;
+ struct evsel_script *es = evsel->priv;
+ unsigned int type = output_type(attr->type);
+ struct addr_location al;
+ FILE *fp = es->fp;
+ int ret = 0;
+
+ if (output[type].fields == 0)
+ return 0;
+
+ /* Set thread to NULL to indicate addr_al and al are not initialized */
+ addr_location__init(&al);
+
+ if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num,
+ sample->time)) {
+ goto out_put;
+ }
+
+ if (debug_mode) {
+ if (sample->time < last_timestamp) {
+ pr_err("Samples misordered, previous: %" PRIu64
+ " this: %" PRIu64 "\n", last_timestamp,
+ sample->time);
+ nr_unordered++;
+ }
+ last_timestamp = sample->time;
+ goto out_put;
+ }
+
+ if (filter_cpu(sample))
+ goto out_put;
+
+ if (machine__resolve(machine, &al, sample) < 0) {
+ pr_err("problem processing %d event, skipping it.\n",
+ event->header.type);
+ ret = -1;
+ goto out_put;
+ }
+
+ if (al.filtered)
+ goto out_put;
+
+ if (!show_event(sample, evsel, al.thread, &al, NULL))
+ goto out_put;
+
+ if (evswitch__discard(&scr->evswitch, evsel))
+ goto out_put;
+
+ perf_sample__fprintf_start(scr, sample, al.thread, evsel,
+ PERF_RECORD_CALLCHAIN_DEFERRED, fp);
+ fprintf(fp, "DEFERRED CALLCHAIN [cookie: %llx]",
+ (unsigned long long)event->callchain_deferred.cookie);
+
+ if (PRINT_FIELD(IP)) {
+ struct callchain_cursor *cursor = NULL;
+
+ if (symbol_conf.use_callchain && sample->callchain) {
+ cursor = get_tls_callchain_cursor();
+ if (thread__resolve_callchain(al.thread, cursor, evsel,
+ sample, NULL, NULL,
+ scripting_max_stack)) {
+ pr_info("cannot resolve deferred callchains\n");
+ cursor = NULL;
+ }
+ }
+
+ fputc(cursor ? '\n' : ' ', fp);
+ sample__fprintf_sym(sample, &al, 0, output[type].print_ip_opts,
+ cursor, symbol_conf.bt_stop_list, fp);
+ }
+
+ fprintf(fp, "\n");
+
+ if (verbose > 0)
+ fflush(fp);
+
+out_put:
+ addr_location__exit(&al);
+ return ret;
+}
+
// Used when scr->per_event_dump is not set
static struct evsel_script es_stdout;
@@ -2701,7 +3011,8 @@ static int process_switch_event(const struct perf_tool *tool,
sample->tid);
}
-static int process_auxtrace_error(struct perf_session *session,
+static int process_auxtrace_error(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event)
{
if (scripting_ops && scripting_ops->process_auxtrace_error) {
@@ -2709,7 +3020,7 @@ static int process_auxtrace_error(struct perf_session *session,
return 0;
}
- return perf_event__process_auxtrace_error(session, event);
+ return perf_event__process_auxtrace_error(tool, session, event);
}
static int
@@ -2757,7 +3068,8 @@ process_bpf_events(const struct perf_tool *tool __maybe_unused,
}
static int
-process_bpf_metadata_event(struct perf_session *session __maybe_unused,
+process_bpf_metadata_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
perf_event__fprintf(event, NULL, stdout);
@@ -3516,7 +3828,8 @@ static void script__setup_sample_type(struct perf_script *script)
}
}
-static int process_stat_round_event(struct perf_session *session,
+static int process_stat_round_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
struct perf_record_stat_round *round = &event->stat_round;
@@ -3531,7 +3844,8 @@ static int process_stat_round_event(struct perf_session *session,
return 0;
}
-static int process_stat_config_event(struct perf_session *session __maybe_unused,
+static int process_stat_config_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
perf_event__read_stat_config(&stat_config, &event->stat_config);
@@ -3565,10 +3879,10 @@ static int set_maps(struct perf_script *script)
}
static
-int process_thread_map_event(struct perf_session *session,
+int process_thread_map_event(const struct perf_tool *tool,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
struct perf_script *script = container_of(tool, struct perf_script, tool);
if (dump_trace)
@@ -3587,10 +3901,10 @@ int process_thread_map_event(struct perf_session *session,
}
static
-int process_cpu_map_event(struct perf_session *session,
+int process_cpu_map_event(const struct perf_tool *tool,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
struct perf_script *script = container_of(tool, struct perf_script, tool);
if (dump_trace)
@@ -3608,7 +3922,8 @@ int process_cpu_map_event(struct perf_session *session,
return set_maps(script);
}
-static int process_feature_event(struct perf_session *session,
+static int process_feature_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
if (event->feat.feat_id < HEADER_LAST_FEATURE)
@@ -3616,14 +3931,13 @@ static int process_feature_event(struct perf_session *session,
return 0;
}
-#ifdef HAVE_AUXTRACE_SUPPORT
-static int perf_script__process_auxtrace_info(struct perf_session *session,
+static int perf_script__process_auxtrace_info(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event)
{
- int ret = perf_event__process_auxtrace_info(session, event);
+ int ret = perf_event__process_auxtrace_info(tool, session, event);
if (ret == 0) {
- const struct perf_tool *tool = session->tool;
struct perf_script *script = container_of(tool, struct perf_script, tool);
ret = perf_script__setup_per_event_dump(script);
@@ -3631,9 +3945,6 @@ static int perf_script__process_auxtrace_info(struct perf_session *session,
return ret;
}
-#else
-#define perf_script__process_auxtrace_info 0
-#endif
static int parse_insn_trace(const struct option *opt __maybe_unused,
const char *str, int unset __maybe_unused)
@@ -3698,6 +4009,7 @@ int cmd_script(int argc, const char **argv)
bool header_only = false;
bool script_started = false;
bool unsorted_dump = false;
+ bool merge_deferred_callchains = true;
char *rec_script_path = NULL;
char *rep_script_path = NULL;
struct perf_session *session;
@@ -3851,6 +4163,8 @@ int cmd_script(int argc, const char **argv)
"Guest code can be found in hypervisor process"),
OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
"Enable LBR callgraph stitching approach"),
+ OPT_BOOLEAN('\0', "merge-callchains", &merge_deferred_callchains,
+ "Enable merge deferred user callchains"),
OPTS_EVSWITCH(&script.evswitch),
OPT_END()
};
@@ -4080,6 +4394,7 @@ script_found:
perf_tool__init(&script.tool, !unsorted_dump);
script.tool.sample = process_sample_event;
+ script.tool.callchain_deferred = process_deferred_sample_event;
script.tool.mmap = perf_event__process_mmap;
script.tool.mmap2 = perf_event__process_mmap2;
script.tool.comm = perf_event__process_comm;
@@ -4106,6 +4421,7 @@ script_found:
script.tool.throttle = process_throttle_event;
script.tool.unthrottle = process_throttle_event;
script.tool.ordering_requires_timestamps = true;
+ script.tool.merge_deferred_callchains = merge_deferred_callchains;
session = perf_session__new(&data, &script.tool);
if (IS_ERR(session))
return PTR_ERR(session);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2c38dd98f6ca..ab40d85fb125 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -74,6 +74,7 @@
#include "util/intel-tpebs.h"
#include "asm/bug.h"
+#include <linux/list_sort.h>
#include <linux/time64.h>
#include <linux/zalloc.h>
#include <api/fs/fs.h>
@@ -96,9 +97,18 @@
#include <perf/evlist.h>
#include <internal/threadmap.h>
+#ifdef HAVE_BPF_SKEL
+#include "util/bpf_skel/bperf_cgroup.h"
+#endif
+
#define DEFAULT_SEPARATOR " "
#define FREEZE_ON_SMI_PATH "bus/event_source/devices/cpu/freeze_on_smi"
+struct rusage_stats {
+ struct stats ru_utime_usec_stat;
+ struct stats ru_stime_usec_stat;
+};
+
static void print_counters(struct timespec *ts, int argc, const char **argv);
static struct evlist *evsel_list;
@@ -128,6 +138,7 @@ static bool interval_count;
static const char *output_name;
static int output_fd;
static char *metrics;
+static struct rusage_stats ru_stats;
struct perf_stat {
bool record;
@@ -228,7 +239,7 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a,
static void perf_stat__reset_stats(void)
{
evlist__reset_stats(evsel_list);
- perf_stat__reset_shadow_stats();
+ memset(stat_config.walltime_nsecs_stats, 0, sizeof(*stat_config.walltime_nsecs_stats));
}
static int process_synthesized_event(const struct perf_tool *tool __maybe_unused,
@@ -278,17 +289,27 @@ static int read_single_counter(struct evsel *counter, int cpu_map_idx, int threa
if (err && cpu_map_idx == 0 &&
(evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME ||
evsel__tool_event(counter) == TOOL_PMU__EVENT_SYSTEM_TIME)) {
- u64 val, *start_time;
struct perf_counts_values *count =
perf_counts(counter->counts, cpu_map_idx, thread);
+ struct perf_counts_values *old_count = NULL;
+ u64 val;
+
+ if (counter->prev_raw_counts)
+ old_count = perf_counts(counter->prev_raw_counts, cpu_map_idx, thread);
- start_time = xyarray__entry(counter->start_times, cpu_map_idx, thread);
if (evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME)
val = ru_stats.ru_utime_usec_stat.mean;
else
val = ru_stats.ru_stime_usec_stat.mean;
- count->ena = count->run = *start_time + val;
+
count->val = val;
+ if (old_count) {
+ count->run = old_count->run + 1;
+ count->ena = old_count->ena + 1;
+ } else {
+ count->run++;
+ count->ena++;
+ }
return 0;
}
return err;
@@ -345,7 +366,7 @@ static int read_counter_cpu(struct evsel *counter, int cpu_map_idx)
return 0;
}
-static int read_affinity_counters(void)
+static int read_counters_with_affinity(void)
{
struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity saved_affinity, *affinity;
@@ -366,6 +387,9 @@ static int read_affinity_counters(void)
if (evsel__is_bpf(counter))
continue;
+ if (evsel__is_tool(counter))
+ continue;
+
if (!counter->err)
counter->err = read_counter_cpu(counter, evlist_cpu_itr.cpu_map_idx);
}
@@ -391,16 +415,46 @@ static int read_bpf_map_counters(void)
return 0;
}
-static int read_counters(void)
+static int read_tool_counters(void)
{
- if (!stat_config.stop_read_counter) {
- if (read_bpf_map_counters() ||
- read_affinity_counters())
- return -1;
+ struct evsel *counter;
+
+ evlist__for_each_entry(evsel_list, counter) {
+ int idx;
+
+ if (!evsel__is_tool(counter))
+ continue;
+
+ perf_cpu_map__for_each_idx(idx, counter->core.cpus) {
+ if (!counter->err)
+ counter->err = read_counter_cpu(counter, idx);
+ }
}
return 0;
}
+static int read_counters(void)
+{
+ int ret;
+
+ if (stat_config.stop_read_counter)
+ return 0;
+
+ // Read all BPF counters first.
+ ret = read_bpf_map_counters();
+ if (ret)
+ return ret;
+
+ // Read non-BPF and non-tool counters next.
+ ret = read_counters_with_affinity();
+ if (ret)
+ return ret;
+
+ // Read the tool counters last. This way the duration_time counter
+ // should always be greater than any other counter's enabled time.
+ return read_tool_counters();
+}
+
static void process_counters(void)
{
struct evsel *counter;
@@ -434,8 +488,8 @@ static void process_interval(void)
pr_err("failed to write stat round event\n");
}
- init_stats(&walltime_nsecs_stats);
- update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL);
+ init_stats(stat_config.walltime_nsecs_stats);
+ update_stats(stat_config.walltime_nsecs_stats, stat_config.interval * 1000000ULL);
print_counters(&rs, 0, NULL);
}
@@ -610,38 +664,34 @@ static int dispatch_events(bool forks, int timeout, int interval, int *times)
enum counter_recovery {
COUNTER_SKIP,
COUNTER_RETRY,
- COUNTER_FATAL,
};
-static enum counter_recovery stat_handle_error(struct evsel *counter)
+static enum counter_recovery stat_handle_error(struct evsel *counter, int err)
{
char msg[BUFSIZ];
+
+ assert(!counter->supported);
+
/*
* PPC returns ENXIO for HW counters until 2.6.37
* (behavior changed with commit b0a873e).
*/
- if (errno == EINVAL || errno == ENOSYS ||
- errno == ENOENT || errno == ENXIO) {
- if (verbose > 0)
- ui__warning("%s event is not supported by the kernel.\n",
- evsel__name(counter));
- counter->supported = false;
- /*
- * errored is a sticky flag that means one of the counter's
- * cpu event had a problem and needs to be reexamined.
- */
- counter->errored = true;
-
- if ((evsel__leader(counter) != counter) ||
- !(counter->core.leader->nr_members > 1))
- return COUNTER_SKIP;
- } else if (evsel__fallback(counter, &target, errno, msg, sizeof(msg))) {
+ if (err == EINVAL || err == ENOSYS || err == ENOENT || err == ENXIO) {
+ if (verbose > 0) {
+ evsel__open_strerror(counter, &target, err, msg, sizeof(msg));
+ ui__warning("%s event is not supported by the kernel.\n%s\n",
+ evsel__name(counter), msg);
+ }
+ return COUNTER_SKIP;
+ }
+ if (evsel__fallback(counter, &target, err, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
+ counter->supported = true;
return COUNTER_RETRY;
- } else if (target__has_per_thread(&target) && errno != EOPNOTSUPP &&
- evsel_list->core.threads &&
- evsel_list->core.threads->err_thread != -1) {
+ }
+ if (target__has_per_thread(&target) && err != EOPNOTSUPP &&
+ evsel_list->core.threads && evsel_list->core.threads->err_thread != -1) {
/*
* For global --per-thread case, skip current
* error thread.
@@ -649,37 +699,85 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
if (!thread_map__remove(evsel_list->core.threads,
evsel_list->core.threads->err_thread)) {
evsel_list->core.threads->err_thread = -1;
+ counter->supported = true;
return COUNTER_RETRY;
}
- } else if (counter->skippable) {
- if (verbose > 0)
- ui__warning("skipping event %s that kernel failed to open .\n",
- evsel__name(counter));
- counter->supported = false;
- counter->errored = true;
- return COUNTER_SKIP;
}
+ if (verbose > 0) {
+ evsel__open_strerror(counter, &target, err, msg, sizeof(msg));
+ ui__warning(err == EOPNOTSUPP
+ ? "%s event is not supported by the kernel.\n%s\n"
+ : "skipping event %s that kernel failed to open.\n%s\n",
+ evsel__name(counter), msg);
+ }
+ return COUNTER_SKIP;
+}
- if (errno == EOPNOTSUPP) {
- if (verbose > 0) {
- ui__warning("%s event is not supported by the kernel.\n",
- evsel__name(counter));
- }
- counter->supported = false;
- counter->errored = true;
+static int create_perf_stat_counter(struct evsel *evsel,
+ struct perf_stat_config *config,
+ int cpu_map_idx)
+{
+ struct perf_event_attr *attr = &evsel->core.attr;
+ struct evsel *leader = evsel__leader(evsel);
+
+ /* Reset supported flag as creating a stat counter is retried. */
+ attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
+ PERF_FORMAT_TOTAL_TIME_RUNNING;
- if ((evsel__leader(counter) != counter) ||
- !(counter->core.leader->nr_members > 1))
- return COUNTER_SKIP;
+ /*
+ * The event is part of non trivial group, let's enable
+ * the group read (for leader) and ID retrieval for all
+ * members.
+ */
+ if (leader->core.nr_members > 1)
+ attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
+
+ attr->inherit = !config->no_inherit && list_empty(&evsel->bpf_counter_list);
+
+ /*
+ * Some events get initialized with sample_(period/type) set,
+ * like tracepoints. Clear it up for counting.
+ */
+ attr->sample_period = 0;
+
+ if (config->identifier)
+ attr->sample_type = PERF_SAMPLE_IDENTIFIER;
+
+ if (config->all_user) {
+ attr->exclude_kernel = 1;
+ attr->exclude_user = 0;
}
- evsel__open_strerror(counter, &target, errno, msg, sizeof(msg));
- ui__error("%s\n", msg);
+ if (config->all_kernel) {
+ attr->exclude_kernel = 0;
+ attr->exclude_user = 1;
+ }
- if (child_pid != -1)
- kill(child_pid, SIGTERM);
+ /*
+ * Disabling all counters initially, they will be enabled
+ * either manually by us or by kernel via enable_on_exec
+ * set later.
+ */
+ if (evsel__is_group_leader(evsel)) {
+ attr->disabled = 1;
+
+ if (target__enable_on_exec(&target))
+ attr->enable_on_exec = 1;
+ }
+
+ return evsel__open_per_cpu_and_thread(evsel, evsel__cpus(evsel), cpu_map_idx,
+ evsel->core.threads);
+}
+
+static void update_rusage_stats(const struct rusage *rusage)
+{
+ const u64 us_to_ns = 1000;
+ const u64 s_to_ns = 1000000000;
- return COUNTER_FATAL;
+ update_stats(&ru_stats.ru_utime_usec_stat,
+ (rusage->ru_utime.tv_usec * us_to_ns + rusage->ru_utime.tv_sec * s_to_ns));
+ update_stats(&ru_stats.ru_stime_usec_stat,
+ (rusage->ru_stime.tv_usec * us_to_ns + rusage->ru_stime.tv_sec * s_to_ns));
}
static int __run_perf_stat(int argc, const char **argv, int run_idx)
@@ -696,8 +794,8 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
bool is_pipe = STAT_RECORD ? perf_stat.data.is_pipe : false;
struct evlist_cpu_iterator evlist_cpu_itr;
struct affinity saved_affinity, *affinity = NULL;
- int err;
- bool second_pass = false;
+ int err, open_err = 0;
+ bool second_pass = false, has_supported_counters;
if (forks) {
if (evlist__prepare_workload(evsel_list, &target, argv, is_pipe, workload_exec_failed_signal) < 0) {
@@ -737,14 +835,17 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
if (target.use_bpf)
break;
- if (counter->reset_group || counter->errored)
+ if (counter->reset_group || !counter->supported)
continue;
if (evsel__is_bperf(counter))
continue;
-try_again:
- if (create_perf_stat_counter(counter, &stat_config, &target,
- evlist_cpu_itr.cpu_map_idx) < 0) {
+ while (true) {
+ if (create_perf_stat_counter(counter, &stat_config,
+ evlist_cpu_itr.cpu_map_idx) == 0)
+ break;
+
+ open_err = errno;
/*
* Weak group failed. We cannot just undo this here
* because earlier CPUs might be in group mode, and the kernel
@@ -752,29 +853,19 @@ try_again:
* it to later.
* Don't close here because we're in the wrong affinity.
*/
- if ((errno == EINVAL || errno == EBADF) &&
+ if ((open_err == EINVAL || open_err == EBADF) &&
evsel__leader(counter) != counter &&
counter->weak_group) {
evlist__reset_weak_group(evsel_list, counter, false);
assert(counter->reset_group);
+ counter->supported = true;
second_pass = true;
- continue;
- }
-
- switch (stat_handle_error(counter)) {
- case COUNTER_FATAL:
- err = -1;
- goto err_out;
- case COUNTER_RETRY:
- goto try_again;
- case COUNTER_SKIP:
- continue;
- default:
break;
}
+ if (stat_handle_error(counter, open_err) != COUNTER_RETRY)
+ break;
}
- counter->supported = true;
}
if (second_pass) {
@@ -787,7 +878,7 @@ try_again:
evlist__for_each_cpu(evlist_cpu_itr, evsel_list, affinity) {
counter = evlist_cpu_itr.evsel;
- if (!counter->reset_group && !counter->errored)
+ if (!counter->reset_group && counter->supported)
continue;
perf_evsel__close_cpu(&counter->core, evlist_cpu_itr.cpu_map_idx);
@@ -798,34 +889,29 @@ try_again:
if (!counter->reset_group)
continue;
-try_again_reset:
- pr_debug2("reopening weak %s\n", evsel__name(counter));
- if (create_perf_stat_counter(counter, &stat_config, &target,
- evlist_cpu_itr.cpu_map_idx) < 0) {
-
- switch (stat_handle_error(counter)) {
- case COUNTER_FATAL:
- err = -1;
- goto err_out;
- case COUNTER_RETRY:
- goto try_again_reset;
- case COUNTER_SKIP:
- continue;
- default:
+
+ while (true) {
+ pr_debug2("reopening weak %s\n", evsel__name(counter));
+ if (create_perf_stat_counter(counter, &stat_config,
+ evlist_cpu_itr.cpu_map_idx) == 0)
+ break;
+
+ open_err = errno;
+ if (stat_handle_error(counter, open_err) != COUNTER_RETRY)
break;
- }
}
- counter->supported = true;
}
}
affinity__cleanup(affinity);
affinity = NULL;
+ has_supported_counters = false;
evlist__for_each_entry(evsel_list, counter) {
if (!counter->supported) {
perf_evsel__free_fd(&counter->core);
continue;
}
+ has_supported_counters = true;
l = strlen(counter->unit);
if (l > stat_config.unit_width)
@@ -837,6 +923,18 @@ try_again_reset:
goto err_out;
}
}
+ if (!has_supported_counters && !stat_config.null_run) {
+ if (open_err) {
+ evsel__open_strerror(evlist__first(evsel_list), &target, open_err,
+ msg, sizeof(msg));
+ }
+ ui__error("No supported events found.\n%s\n", msg);
+
+ if (child_pid != -1)
+ kill(child_pid, SIGTERM);
+ err = -1;
+ goto err_out;
+ }
if (evlist__apply_filters(evsel_list, &counter, &target)) {
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
@@ -909,10 +1007,20 @@ try_again_reset:
goto err_out;
}
- if (WIFSIGNALED(status))
+ if (WIFSIGNALED(status)) {
+ /*
+ * We want to indicate failure to stop a repeat run,
+ * hence negative. We want the value to be the exit code
+ * of perf, which for termination by a signal is 128
+ * plus the signal number.
+ */
+ err = 0 - (128 + WTERMSIG(status));
psignal(WTERMSIG(status), argv[0]);
+ } else {
+ err = WEXITSTATUS(status);
+ }
} else {
- status = dispatch_events(forks, timeout, interval, &times);
+ err = dispatch_events(forks, timeout, interval, &times);
}
disable_counters();
@@ -925,15 +1033,15 @@ try_again_reset:
if (interval && stat_config.summary) {
stat_config.interval = 0;
stat_config.stop_read_counter = true;
- init_stats(&walltime_nsecs_stats);
- update_stats(&walltime_nsecs_stats, t1 - t0);
+ init_stats(stat_config.walltime_nsecs_stats);
+ update_stats(stat_config.walltime_nsecs_stats, t1 - t0);
evlist__copy_prev_raw_counts(evsel_list);
evlist__reset_prev_raw_counts(evsel_list);
evlist__reset_aggr_stats(evsel_list);
} else {
- update_stats(&walltime_nsecs_stats, t1 - t0);
- update_rusage_stats(&ru_stats, &stat_config.ru_data);
+ update_stats(stat_config.walltime_nsecs_stats, t1 - t0);
+ update_rusage_stats(&stat_config.ru_data);
}
/*
@@ -952,7 +1060,7 @@ try_again_reset:
if (!STAT_RECORD)
evlist__close(evsel_list);
- return WEXITSTATUS(status);
+ return err;
err_out:
if (forks)
@@ -1822,6 +1930,35 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
return 0;
}
+static int default_evlist_evsel_cmp(void *priv __maybe_unused,
+ const struct list_head *l,
+ const struct list_head *r)
+{
+ const struct perf_evsel *lhs_core = container_of(l, struct perf_evsel, node);
+ const struct evsel *lhs = container_of(lhs_core, struct evsel, core);
+ const struct perf_evsel *rhs_core = container_of(r, struct perf_evsel, node);
+ const struct evsel *rhs = container_of(rhs_core, struct evsel, core);
+
+ if (evsel__leader(lhs) == evsel__leader(rhs)) {
+ /* Within the same group, respect the original order. */
+ return lhs_core->idx - rhs_core->idx;
+ }
+
+ /* Sort default metrics evsels first, and default show events before those. */
+ if (lhs->default_metricgroup != rhs->default_metricgroup)
+ return lhs->default_metricgroup ? -1 : 1;
+
+ if (lhs->default_show_events != rhs->default_show_events)
+ return lhs->default_show_events ? -1 : 1;
+
+ /* Sort by PMU type (prefers legacy types first). */
+ if (lhs->pmu != rhs->pmu)
+ return lhs->pmu->type - rhs->pmu->type;
+
+ /* Sort by name. */
+ return strcmp(evsel__name((struct evsel *)lhs), evsel__name((struct evsel *)rhs));
+}
+
/*
* Add default events, if there were no attributes specified or
* if -d/--detailed, -d -d or -d -d -d is used:
@@ -1944,48 +2081,39 @@ static int add_default_events(void)
stat_config.topdown_level = 1;
if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) {
- /* No events so add defaults. */
- if (target__has_cpu(&target))
- ret = parse_events(evlist, "cpu-clock", &err);
- else
- ret = parse_events(evlist, "task-clock", &err);
- if (ret)
- goto out;
-
- ret = parse_events(evlist,
- "context-switches,"
- "cpu-migrations,"
- "page-faults,"
- "instructions,"
- "cycles,"
- "stalled-cycles-frontend,"
- "stalled-cycles-backend,"
- "branches,"
- "branch-misses",
- &err);
- if (ret)
- goto out;
-
/*
- * Add TopdownL1 metrics if they exist. To minimize
- * multiplexing, don't request threshold computation.
+ * Add Default metrics. To minimize multiplexing, don't request
+ * threshold computation, but it will be computed if the events
+ * are present.
*/
- if (metricgroup__has_metric_or_groups(pmu, "Default")) {
- struct evlist *metric_evlist = evlist__new();
+ const char *default_metricgroup_names[] = {
+ "Default", "Default2", "Default3", "Default4",
+ };
+ for (size_t i = 0; i < ARRAY_SIZE(default_metricgroup_names); i++) {
+ struct evlist *metric_evlist;
+
+ if (!metricgroup__has_metric_or_groups(pmu, default_metricgroup_names[i]))
+ continue;
+
+ if ((int)i > detailed_run)
+ break;
+
+ metric_evlist = evlist__new();
if (!metric_evlist) {
ret = -ENOMEM;
- goto out;
+ break;
}
- if (metricgroup__parse_groups(metric_evlist, pmu, "Default",
+ if (metricgroup__parse_groups(metric_evlist, pmu, default_metricgroup_names[i],
/*metric_no_group=*/false,
/*metric_no_merge=*/false,
/*metric_no_threshold=*/true,
stat_config.user_requested_cpu_list,
stat_config.system_wide,
stat_config.hardware_aware_grouping) < 0) {
+ evlist__delete(metric_evlist);
ret = -1;
- goto out;
+ break;
}
evlist__for_each_entry(metric_evlist, evsel)
@@ -1997,44 +2125,8 @@ static int add_default_events(void)
&metric_evlist->metric_events);
evlist__delete(metric_evlist);
}
- }
+ list_sort(/*priv=*/NULL, &evlist->core.entries, default_evlist_evsel_cmp);
- /* Detailed events get appended to the event list: */
-
- if (!ret && detailed_run >= 1) {
- /*
- * Detailed stats (-d), covering the L1 and last level data
- * caches:
- */
- ret = parse_events(evlist,
- "L1-dcache-loads,"
- "L1-dcache-load-misses,"
- "LLC-loads,"
- "LLC-load-misses",
- &err);
- }
- if (!ret && detailed_run >= 2) {
- /*
- * Very detailed stats (-d -d), covering the instruction cache
- * and the TLB caches:
- */
- ret = parse_events(evlist,
- "L1-icache-loads,"
- "L1-icache-load-misses,"
- "dTLB-loads,"
- "dTLB-load-misses,"
- "iTLB-loads,"
- "iTLB-load-misses",
- &err);
- }
- if (!ret && detailed_run >= 3) {
- /*
- * Very, very detailed stats (-d -d -d), adding prefetch events:
- */
- ret = parse_events(evlist,
- "L1-dcache-prefetches,"
- "L1-dcache-prefetch-misses",
- &err);
}
out:
if (!ret) {
@@ -2043,7 +2135,7 @@ out:
* Make at least one event non-skippable so fatal errors are visible.
* 'cycles' always used to be default and non-skippable, so use that.
*/
- if (strcmp("cycles", evsel__name(evsel)))
+ if (!evsel__match(evsel, HARDWARE, HW_CPU_CYCLES))
evsel->skippable = true;
}
}
@@ -2107,7 +2199,8 @@ static int __cmd_record(const struct option stat_options[], struct opt_aggr_mode
return argc;
}
-static int process_stat_round_event(struct perf_session *session,
+static int process_stat_round_event(const struct perf_tool *tool __maybe_unused,
+ struct perf_session *session,
union perf_event *event)
{
struct perf_record_stat_round *stat_round = &event->stat_round;
@@ -2119,7 +2212,7 @@ static int process_stat_round_event(struct perf_session *session,
process_counters();
if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
- update_stats(&walltime_nsecs_stats, stat_round->time);
+ update_stats(stat_config.walltime_nsecs_stats, stat_round->time);
if (stat_config.interval && stat_round->time) {
tsh.tv_sec = stat_round->time / NSEC_PER_SEC;
@@ -2132,10 +2225,10 @@ static int process_stat_round_event(struct perf_session *session,
}
static
-int process_stat_config_event(struct perf_session *session,
+int process_stat_config_event(const struct perf_tool *tool,
+ struct perf_session *session,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
perf_event__read_stat_config(&stat_config, &event->stat_config);
@@ -2181,10 +2274,10 @@ static int set_maps(struct perf_stat *st)
}
static
-int process_thread_map_event(struct perf_session *session,
+int process_thread_map_event(const struct perf_tool *tool,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
if (st->threads) {
@@ -2200,10 +2293,10 @@ int process_thread_map_event(struct perf_session *session,
}
static
-int process_cpu_map_event(struct perf_session *session,
+int process_cpu_map_event(const struct perf_tool *tool,
+ struct perf_session *session __maybe_unused,
union perf_event *event)
{
- const struct perf_tool *tool = session->tool;
struct perf_stat *st = container_of(tool, struct perf_stat, tool);
struct perf_cpu_map *cpus;
@@ -2511,6 +2604,7 @@ int cmd_stat(int argc, const char **argv)
unsigned int interval, timeout;
const char * const stat_subcommands[] = { "record", "report" };
char errbuf[BUFSIZ];
+ struct evsel *counter;
setlocale(LC_ALL, "");
@@ -2765,9 +2859,28 @@ int cmd_stat(int argc, const char **argv)
goto out;
}
}
-
+#ifdef HAVE_BPF_SKEL
+ if (target.use_bpf && nr_cgroups &&
+ (evsel_list->core.nr_entries / nr_cgroups) > BPERF_CGROUP__MAX_EVENTS) {
+ pr_warning("Disabling BPF counters due to more events (%d) than the max (%d)\n",
+ evsel_list->core.nr_entries / nr_cgroups, BPERF_CGROUP__MAX_EVENTS);
+ target.use_bpf = false;
+ }
+#endif // HAVE_BPF_SKEL
evlist__warn_user_requested_cpus(evsel_list, target.cpu_list);
+ evlist__for_each_entry(evsel_list, counter) {
+ /*
+ * Setup BPF counters to require CPUs as any(-1) isn't
+ * supported. evlist__create_maps below will propagate this
+ * information to the evsels. Note, evsel__is_bperf isn't yet
+ * set up, and this change must happen early, so directly use
+ * the bpf_counter variable and target information.
+ */
+ if ((counter->bpf_counter || target.use_bpf) && !target__has_cpu(&target))
+ counter->core.requires_cpu = true;
+ }
+
if (evlist__create_maps(evsel_list, &target) < 0) {
if (target__has_task(&target)) {
pr_err("Problems finding threads of monitor\n");
@@ -2866,7 +2979,7 @@ int cmd_stat(int argc, const char **argv)
evlist__reset_prev_raw_counts(evsel_list);
status = run_perf_stat(argc, argv, run_idx);
- if (status == -1)
+ if (status < 0)
break;
if (forever && !interval) {
@@ -2907,7 +3020,7 @@ int cmd_stat(int argc, const char **argv)
}
if (!interval) {
- if (WRITE_STAT_ROUND_EVENT(walltime_nsecs_stats.max, FINAL))
+ if (WRITE_STAT_ROUND_EVENT(stat_config.walltime_nsecs_stats->max, FINAL))
pr_err("failed to write stat round event\n");
}
@@ -2936,5 +3049,6 @@ out:
evlist__close_control(stat_config.ctl_fd, stat_config.ctl_fd_ack, &stat_config.ctl_fd_close);
- return status;
+ /* Only the low byte of status becomes the exit code. */
+ return abs(status);
}
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 22050c640dfa..f8b49d69e9a5 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -1651,7 +1651,7 @@ out_delete:
return ret;
}
-static int timechart__io_record(int argc, const char **argv)
+static int timechart__io_record(int argc, const char **argv, const char *output_data)
{
unsigned int rec_argc, i;
const char **rec_argv;
@@ -1659,7 +1659,7 @@ static int timechart__io_record(int argc, const char **argv)
char *filter = NULL;
const char * const common_args[] = {
- "record", "-a", "-R", "-c", "1",
+ "record", "-a", "-R", "-c", "1", "-o", output_data,
};
unsigned int common_args_nr = ARRAY_SIZE(common_args);
@@ -1786,7 +1786,8 @@ static int timechart__io_record(int argc, const char **argv)
}
-static int timechart__record(struct timechart *tchart, int argc, const char **argv)
+static int timechart__record(struct timechart *tchart, int argc, const char **argv,
+ const char *output_data)
{
unsigned int rec_argc, i, j;
const char **rec_argv;
@@ -1794,7 +1795,7 @@ static int timechart__record(struct timechart *tchart, int argc, const char **ar
unsigned int record_elems;
const char * const common_args[] = {
- "record", "-a", "-R", "-c", "1",
+ "record", "-a", "-R", "-c", "1", "-o", output_data,
};
unsigned int common_args_nr = ARRAY_SIZE(common_args);
@@ -1934,6 +1935,7 @@ int cmd_timechart(int argc, const char **argv)
.merge_dist = 1000,
};
const char *output_name = "output.svg";
+ const char *output_record_data = "perf.data";
const struct option timechart_common_options[] = {
OPT_BOOLEAN('P', "power-only", &tchart.power_only, "output power data only"),
OPT_BOOLEAN('T', "tasks-only", &tchart.tasks_only, "output processes data only"),
@@ -1976,6 +1978,7 @@ int cmd_timechart(int argc, const char **argv)
OPT_BOOLEAN('I', "io-only", &tchart.io_only,
"record only IO data"),
OPT_BOOLEAN('g', "callchain", &tchart.with_backtrace, "record callchain"),
+ OPT_STRING('o', "output", &output_record_data, "file", "output data file name"),
OPT_PARENT(timechart_common_options),
};
const char * const timechart_record_usage[] = {
@@ -2024,9 +2027,9 @@ int cmd_timechart(int argc, const char **argv)
}
if (tchart.io_only)
- ret = timechart__io_record(argc, argv);
+ ret = timechart__io_record(argc, argv, output_record_data);
else
- ret = timechart__record(&tchart, argc, argv);
+ ret = timechart__record(&tchart, argc, argv, output_record_data);
goto out;
} else if (argc)
usage_with_options(timechart_usage, timechart_options);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a11f629c7d76..710604c4f6f6 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1695,11 +1695,13 @@ int cmd_top(int argc, const char **argv)
goto out_delete_evlist;
if (!top.evlist->core.nr_entries) {
- bool can_profile_kernel = perf_event_paranoid_check(1);
- int err = parse_event(top.evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu");
+ struct evlist *def_evlist = evlist__new_default();
- if (err)
+ if (!def_evlist)
goto out_delete_evlist;
+
+ evlist__splice_list_tail(top.evlist, &def_evlist->core.entries);
+ evlist__delete(def_evlist);
}
status = evswitch__init(&top.evswitch, top.evlist, stderr);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index fe737b3ac6e6..baee1f695600 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -196,6 +196,7 @@ struct trace {
unsigned int max_stack;
unsigned int min_stack;
enum trace_summary_mode summary_mode;
+ int max_summary;
int raw_augmented_syscalls_args_size;
bool raw_augmented_syscalls;
bool fd_path_disabled;
@@ -2004,7 +2005,9 @@ static int trace__symbols_init(struct trace *trace, int argc, const char **argv,
err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
evlist->core.threads, trace__tool_process,
- true, false, 1);
+ /*needs_mmap=*/callchain_param.enabled,
+ /*mmap_data=*/false,
+ /*nr_threads_synthesize=*/1);
out:
if (err) {
perf_env__exit(&trace->host_env);
@@ -2066,6 +2069,15 @@ static const struct syscall_arg_fmt *syscall_arg_fmt__find_by_name(const char *n
return __syscall_arg_fmt__find_by_name(syscall_arg_fmts__by_name, nmemb, name);
}
+/*
+ * v6.19 kernel added new fields to read userspace memory for event tracing.
+ * But it's not used by perf and confuses the syscall parameters.
+ */
+static bool is_internal_field(struct tep_format_field *field)
+{
+ return !strcmp(field->type, "__data_loc char[]");
+}
+
static struct tep_format_field *
syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field *field,
bool *use_btf)
@@ -2074,6 +2086,10 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
int len;
for (; field; field = field->next, ++arg) {
+ /* assume it's the last argument */
+ if (is_internal_field(field))
+ continue;
+
last_field = field;
if (arg->scnprintf)
@@ -2142,6 +2158,7 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
{
char tp_name[128];
const char *name;
+ struct tep_format_field *field;
int err;
if (sc->nonexistent)
@@ -2198,6 +2215,13 @@ static int syscall__read_info(struct syscall *sc, struct trace *trace)
--sc->nr_args;
}
+ field = sc->args;
+ while (field) {
+ if (is_internal_field(field))
+ --sc->nr_args;
+ field = field->next;
+ }
+
sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit");
sc->is_open = !strcmp(name, "open") || !strcmp(name, "openat");
@@ -4440,7 +4464,7 @@ create_maps:
if (trace->summary_mode == SUMMARY__BY_TOTAL && !trace->summary_bpf) {
trace->syscall_stats = alloc_syscall_stats();
- if (trace->syscall_stats == NULL)
+ if (IS_ERR(trace->syscall_stats))
goto out_delete_evlist;
}
@@ -4599,7 +4623,7 @@ out_disable:
if (!err) {
if (trace->summary) {
if (trace->summary_bpf)
- trace_print_bpf_summary(trace->output);
+ trace_print_bpf_summary(trace->output, trace->max_summary);
else if (trace->summary_mode == SUMMARY__BY_TOTAL)
trace__fprintf_total_summary(trace, trace->output);
else
@@ -4748,7 +4772,7 @@ static int trace__replay(struct trace *trace)
if (trace->summary_mode == SUMMARY__BY_TOTAL) {
trace->syscall_stats = alloc_syscall_stats();
- if (trace->syscall_stats == NULL)
+ if (IS_ERR(trace->syscall_stats))
goto out;
}
@@ -4822,6 +4846,7 @@ static size_t syscall__dump_stats(struct trace *trace, int e_machine, FILE *fp,
struct hashmap *syscall_stats)
{
size_t printed = 0;
+ int lines = 0;
struct syscall *sc;
struct syscall_entry *entries;
@@ -4866,7 +4891,11 @@ static size_t syscall__dump_stats(struct trace *trace, int e_machine, FILE *fp,
fprintf(fp, "\t\t\t\t%s: %d\n", perf_env__arch_strerrno(trace->host->env, e + 1), stats->errnos[e]);
}
}
+ lines++;
}
+
+ if (trace->max_summary && trace->max_summary <= lines)
+ break;
}
free(entries);
@@ -5443,6 +5472,8 @@ int cmd_trace(int argc, const char **argv)
OPT_BOOLEAN(0, "force-btf", &trace.force_btf, "Prefer btf_dump general pretty printer"
"to customized ones"),
OPT_BOOLEAN(0, "bpf-summary", &trace.summary_bpf, "Summary syscall stats in BPF"),
+ OPT_INTEGER(0, "max-summary", &trace.max_summary,
+ "Max number of entries in the summary."),
OPTS_EVSWITCH(&trace.evswitch),
OPT_END()
};
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index be519c433ce4..e0537f275da2 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -11,10 +11,16 @@ declare -a FILES=(
"include/uapi/linux/bits.h"
"include/uapi/linux/fadvise.h"
"include/uapi/linux/fscrypt.h"
+ "include/uapi/linux/genetlink.h"
+ "include/uapi/linux/if_addr.h"
+ "include/uapi/linux/in.h"
"include/uapi/linux/kcmp.h"
"include/uapi/linux/kvm.h"
- "include/uapi/linux/in.h"
+ "include/uapi/linux/neighbour.h"
+ "include/uapi/linux/netfilter.h"
+ "include/uapi/linux/netfilter_arp.h"
"include/uapi/linux/perf_event.h"
+ "include/uapi/linux/rtnetlink.h"
"include/uapi/linux/seccomp.h"
"include/uapi/linux/stat.h"
"include/linux/bits.h"
@@ -23,6 +29,7 @@ declare -a FILES=(
"include/linux/const.h"
"include/vdso/const.h"
"include/vdso/unaligned.h"
+ "include/linux/gfp_types.h"
"include/linux/hash.h"
"include/linux/list-sort.h"
"include/uapi/linux/hw_breakpoint.h"
@@ -40,15 +47,12 @@ declare -a FILES=(
"arch/s390/include/uapi/asm/perf_regs.h"
"arch/x86/include/uapi/asm/perf_regs.h"
"arch/x86/include/uapi/asm/kvm.h"
- "arch/x86/include/uapi/asm/kvm_perf.h"
"arch/x86/include/uapi/asm/svm.h"
"arch/x86/include/uapi/asm/unistd.h"
"arch/x86/include/uapi/asm/vmx.h"
"arch/powerpc/include/uapi/asm/kvm.h"
"arch/s390/include/uapi/asm/kvm.h"
- "arch/s390/include/uapi/asm/kvm_perf.h"
"arch/s390/include/uapi/asm/sie.h"
- "arch/arm/include/uapi/asm/kvm.h"
"arch/arm64/include/uapi/asm/kvm.h"
"arch/arm64/include/uapi/asm/unistd.h"
"arch/alpha/include/uapi/asm/errno.h"
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 3cb40965549f..e004178472d9 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -2,9 +2,7 @@
#ifndef _PERF_PERF_H
#define _PERF_PERF_H
-#ifndef MAX_NR_CPUS
#define MAX_NR_CPUS 4096
-#endif
enum perf_affinity {
PERF_AFFINITY_SYS = 0,
diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 32f387d48908..a46ab7b612df 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -1,7 +1,5 @@
pmu-events-y += pmu-events.o
-JDIR = pmu-events/arch/$(SRCARCH)
-JSON = $(shell [ -d $(JDIR) ] && \
- find $(JDIR) -name '*.json' -o -name 'mapfile.csv')
+JSON = $(shell find pmu-events/arch -name '*.json' -o -name '*.csv')
JDIR_TEST = pmu-events/arch/test
JSON_TEST = $(shell [ -d $(JDIR_TEST) ] && \
find $(JDIR_TEST) -name '*.json')
@@ -13,6 +11,8 @@ PMU_EVENTS_C = $(OUTPUT)pmu-events/pmu-events.c
METRIC_TEST_LOG = $(OUTPUT)pmu-events/metric_test.log
TEST_EMPTY_PMU_EVENTS_C = $(OUTPUT)pmu-events/test-empty-pmu-events.c
EMPTY_PMU_EVENTS_TEST_LOG = $(OUTPUT)pmu-events/empty-pmu-events.log
+LEGACY_CACHE_PY = pmu-events/make_legacy_cache.py
+LEGACY_CACHE_JSON = $(OUTPUT)pmu-events/arch/common/common/legacy-cache.json
ifeq ($(JEVENTS_ARCH),)
JEVENTS_ARCH=$(SRCARCH)
@@ -29,13 +29,26 @@ $(PMU_EVENTS_C): $(EMPTY_PMU_EVENTS_C)
$(call rule_mkdir)
$(Q)$(call echo-cmd,gen)cp $< $@
else
+# Copy checked-in json to OUTPUT for generation if it's an out of source build
+ifneq ($(OUTPUT),)
+$(OUTPUT)pmu-events/arch/%: pmu-events/arch/%
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,gen)cp $< $@
+endif
+
+$(LEGACY_CACHE_JSON): $(LEGACY_CACHE_PY)
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,gen)$(PYTHON) $(LEGACY_CACHE_PY) > $@
+
+GEN_JSON = $(patsubst %,$(OUTPUT)%,$(JSON)) $(LEGACY_CACHE_JSON)
+
$(METRIC_TEST_LOG): $(METRIC_TEST_PY) $(METRIC_PY)
$(call rule_mkdir)
$(Q)$(call echo-cmd,test)$(PYTHON) $< 2> $@ || (cat $@ && false)
-$(TEST_EMPTY_PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG)
+$(TEST_EMPTY_PMU_EVENTS_C): $(GEN_JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG)
$(call rule_mkdir)
- $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) none none pmu-events/arch $@
+ $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) none none $(OUTPUT)pmu-events/arch $@
$(EMPTY_PMU_EVENTS_TEST_LOG): $(EMPTY_PMU_EVENTS_C) $(TEST_EMPTY_PMU_EVENTS_C)
$(call rule_mkdir)
@@ -63,10 +76,10 @@ $(OUTPUT)%.pylint_log: %
$(call rule_mkdir)
$(Q)$(call echo-cmd,test)pylint "$<" > $@ || (cat $@ && rm $@ && false)
-$(PMU_EVENTS_C): $(JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG) \
+$(PMU_EVENTS_C): $(GEN_JSON) $(JSON_TEST) $(JEVENTS_PY) $(METRIC_PY) $(METRIC_TEST_LOG) \
$(EMPTY_PMU_EVENTS_TEST_LOG) $(PMU_EVENTS_MYPY_TEST_LOGS) $(PMU_EVENTS_PYLINT_TEST_LOGS)
$(call rule_mkdir)
- $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) pmu-events/arch $@
+ $(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) $(JEVENTS_MODEL) $(OUTPUT)pmu-events/arch $@
endif
# pmu-events.c file is generated in the OUTPUT directory so it needs a
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json
index afcdad58ef89..324104438e78 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereone/metrics.json
@@ -113,7 +113,7 @@
{
"MetricName": "load_store_spec_rate",
"MetricExpr": "((LDST_SPEC / INST_SPEC) * 100)",
- "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speclatively executed",
+ "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speculatively executed",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "1percent of operations"
},
@@ -132,7 +132,7 @@
{
"MetricName": "pc_write_spec_rate",
"MetricExpr": "((PC_WRITE_SPEC / INST_SPEC) * 100)",
- "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speclatively executed",
+ "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speculatively executed",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "1percent of operations"
},
@@ -195,14 +195,14 @@
{
"MetricName": "stall_frontend_cache_rate",
"MetricExpr": "((STALL_FRONTEND_CACHE / CPU_CYCLES) * 100)",
- "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss",
+ "BriefDescription": "Proportion of cycles stalled and no operations delivered from frontend and cache miss",
"MetricGroup": "Stall",
"ScaleUnit": "1percent of cycles"
},
{
"MetricName": "stall_frontend_tlb_rate",
"MetricExpr": "((STALL_FRONTEND_TLB / CPU_CYCLES) * 100)",
- "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss",
+ "BriefDescription": "Proportion of cycles stalled and no operations delivered from frontend and TLB miss",
"MetricGroup": "Stall",
"ScaleUnit": "1percent of cycles"
},
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json
index 5228f94a793f..a29aadc9b2e3 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/ampereonex/metrics.json
@@ -113,7 +113,7 @@
{
"MetricName": "load_store_spec_rate",
"MetricExpr": "LDST_SPEC / INST_SPEC",
- "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speclatively executed",
+ "BriefDescription": "The rate of load or store instructions speculatively executed to overall instructions speculatively executed",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "100percent of operations"
},
@@ -132,7 +132,7 @@
{
"MetricName": "pc_write_spec_rate",
"MetricExpr": "PC_WRITE_SPEC / INST_SPEC",
- "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speclatively executed",
+ "BriefDescription": "The rate of software change of the PC speculatively executed to overall instructions speculatively executed",
"MetricGroup": "Operation_Mix",
"ScaleUnit": "100percent of operations"
},
@@ -195,14 +195,14 @@
{
"MetricName": "stall_frontend_cache_rate",
"MetricExpr": "STALL_FRONTEND_CACHE / CPU_CYCLES",
- "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and cache miss",
+ "BriefDescription": "Proportion of cycles stalled and no operations delivered from frontend and cache miss",
"MetricGroup": "Stall",
"ScaleUnit": "100percent of cycles"
},
{
"MetricName": "stall_frontend_tlb_rate",
"MetricExpr": "STALL_FRONTEND_TLB / CPU_CYCLES",
- "BriefDescription": "Proportion of cycles stalled and no ops delivered from frontend and TLB miss",
+ "BriefDescription": "Proportion of cycles stalled and no operations delivered from frontend and TLB miss",
"MetricGroup": "Stall",
"ScaleUnit": "100percent of cycles"
},
@@ -388,55 +388,55 @@
"MetricExpr": "L1D_CACHE_RW / L1D_CACHE",
"BriefDescription": "L1D cache access - demand",
"MetricGroup": "Cache",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
- "MetricName": "l1d_cache_access_prefetces",
+ "MetricName": "l1d_cache_access_prefetches",
"MetricExpr": "L1D_CACHE_PRFM / L1D_CACHE",
"BriefDescription": "L1D cache access - prefetch",
"MetricGroup": "Cache",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "l1d_cache_demand_misses",
"MetricExpr": "L1D_CACHE_REFILL_RW / L1D_CACHE",
"BriefDescription": "L1D cache demand misses",
"MetricGroup": "Cache",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "l1d_cache_demand_misses_read",
"MetricExpr": "L1D_CACHE_REFILL_RD / L1D_CACHE",
"BriefDescription": "L1D cache demand misses - read",
"MetricGroup": "Cache",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "l1d_cache_demand_misses_write",
"MetricExpr": "L1D_CACHE_REFILL_WR / L1D_CACHE",
"BriefDescription": "L1D cache demand misses - write",
"MetricGroup": "Cache",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "l1d_cache_prefetch_misses",
"MetricExpr": "L1D_CACHE_REFILL_PRFM / L1D_CACHE",
"BriefDescription": "L1D cache prefetch misses",
"MetricGroup": "Cache",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "ase_scalar_mix",
"MetricExpr": "ASE_SCALAR_SPEC / OP_SPEC",
"BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) scalar operations",
"MetricGroup": "Instructions",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
},
{
"MetricName": "ase_vector_mix",
"MetricExpr": "ASE_VECTOR_SPEC / OP_SPEC",
"BriefDescription": "Proportion of advanced SIMD data processing operations (excluding DP_SPEC/LD_SPEC) vector operations",
"MetricGroup": "Instructions",
- "ScaleUnit": "100percent of cache acceses"
+ "ScaleUnit": "100percent of cache accesses"
}
]
diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
index 4cc50b7da526..4001cc5753a7 100644
--- a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
+++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json
@@ -81,7 +81,7 @@
"BriefDescription": "L2D TLB access"
},
{
- "PublicDescription": "Level 2 access to instruciton TLB that caused a page table walk. This event counts on any instruciton access which causes L2I_TLB_REFILL to count",
+ "PublicDescription": "Level 2 access to instruction TLB that caused a page table walk. This event counts on any instruction access which causes L2I_TLB_REFILL to count",
"EventCode": "0x35",
"EventName": "L2I_TLB_ACCESS",
"BriefDescription": "L2I TLB access"
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/ddrc.json
new file mode 100644
index 000000000000..aa7b58721dc7
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/ddrc.json
@@ -0,0 +1,9 @@
+[
+ {
+ "BriefDescription": "ddr cycles event",
+ "EventCode": "0x00",
+ "EventName": "imx94_ddr.cycles",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/metrics.json
new file mode 100644
index 000000000000..629f1f52761e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/arm64/freescale/imx94/sys/metrics.json
@@ -0,0 +1,450 @@
+[
+ {
+ "BriefDescription": "bandwidth usage for lpddr5 evk board",
+ "MetricName": "imx94_bandwidth_usage.lpddr5",
+ "MetricExpr": "(( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x000\\,axi_id\\=0x000@ + imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32 / duration_time) / (4266 * 1000000 * 4)",
+ "ScaleUnit": "1e2%",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bandwidth usage for lpddr4 evk board",
+ "MetricName": "imx94_bandwidth_usage.lpddr4",
+ "MetricExpr": "(( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x000\\,axi_id\\=0x000@ + imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32 / duration_time) / (4266 * 1000000 * 4)",
+ "ScaleUnit": "1e2%",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of all masters read from ddr",
+ "MetricName": "imx94_ddr_read.all",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of all masters write to ddr",
+ "MetricName": "imx94_ddr_write.all",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of all a55 read from ddr",
+ "MetricName": "imx94_ddr_read.a55_all",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x3fc\\,axi_id\\=0x000@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of all a55 write from ddr",
+ "MetricName": "imx94_ddr_write.a55_all",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3fc\\,axi_id\\=0x000@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 0 read from ddr",
+ "MetricName": "imx94_ddr_read.a55_0",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x3ff\\,axi_id\\=0x000@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 0 write to ddr",
+ "MetricName": "imx94_ddr_write.a55_0",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3ff\\,axi_id\\=0x000@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 1 read from ddr",
+ "MetricName": "imx94_ddr_read.a55_1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x00f\\,axi_id\\=0x001@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 1 write to ddr",
+ "MetricName": "imx94_ddr_write.a55_1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x001@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 2 read from ddr",
+ "MetricName": "imx94_ddr_read.a55_2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x00f\\,axi_id\\=0x002@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 2 write to ddr",
+ "MetricName": "imx94_ddr_write.a55_2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x002@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 3 read from ddr",
+ "MetricName": "imx94_ddr_read.a55_3",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x00f\\,axi_id\\=0x003@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of a55 core 3 write to ddr",
+ "MetricName": "imx94_ddr_write.a55_3",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x003@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m7 core1 read from ddr",
+ "MetricName": "imx94_ddr_read.m7_1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x00f\\,axi_id\\=0x004@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m7 core1 write to ddr",
+ "MetricName": "imx94_ddr_write.m7_1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x004@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m33 core1 (in netc) read from ddr",
+ "MetricName": "imx94_ddr_read.m33_1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x00f\\,axi_id\\=0x005@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m33 core1 (in netc) write to ddr",
+ "MetricName": "imx94_ddr_write.m33_1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x005@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of pcie2 read from ddr",
+ "MetricName": "imx94_ddr_read.pcie2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x00f\\,axi_id\\=0x006@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of pcie2 write to ddr",
+ "MetricName": "imx94_ddr_write.pcie2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x006@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of Cortex-A DSU L3 evicted/ACP transactions read from ddr",
+ "MetricName": "imx94_ddr_read.cortex_a_dsu",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x00f\\,axi_id\\=0x007@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of Cortex-A DSU L3 evicted/ACP transactions write to ddr",
+ "MetricName": "imx94_ddr_write.cortex_a_dsu",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x007@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m33 core0 read from ddr",
+ "MetricName": "imx94_ddr_read.m33_0",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x00f\\,axi_id\\=0x008@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m33 core0 write to ddr",
+ "MetricName": "imx94_ddr_write.m33_0",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x008@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m7 core0 read from ddr",
+ "MetricName": "imx94_ddr_read.m7_0",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x00f\\,axi_id\\=0x009@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of m7 core0 write to ddr",
+ "MetricName": "imx94_ddr_write.m7_0",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x009@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of sentinel read from ddr",
+ "MetricName": "imx94_ddr_read.sentinel",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x00f\\,axi_id\\=0x00a@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of sentinel write to ddr",
+ "MetricName": "imx94_ddr_write.sentinel",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x00a@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of edma1 read from ddr",
+ "MetricName": "imx94_ddr_read.edma1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x00f\\,axi_id\\=0x00b@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of edma1 write to ddr",
+ "MetricName": "imx94_ddr_write.edma1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x00b@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of edma2 read from ddr",
+ "MetricName": "imx94_ddr_read.edma2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x00f\\,axi_id\\=0x00c@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of edma2 write to ddr",
+ "MetricName": "imx94_ddr_write.edma2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x00c@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of netc read from ddr",
+ "MetricName": "imx94_ddr_read.netc",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x00f\\,axi_id\\=0x00d@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of netc write to ddr",
+ "MetricName": "imx94_ddr_write.netc",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x00d@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of aonmix read from ddr",
+ "MetricName": "imx94_ddr_read.aonmix",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x00f\\,axi_id\\=0x00f@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of aonmix write to ddr",
+ "MetricName": "imx94_ddr_write.aonmix",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x00f\\,axi_id\\=0x00f@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of npumix read from ddr",
+ "MetricName": "imx94_ddr_read.npumix",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x3f0\\,axi_id\\=0x010@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of npumix write to ddr",
+ "MetricName": "imx94_ddr_write.npumix",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x010@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usdhc1 read from ddr",
+ "MetricName": "imx94_ddr_read.usdhc1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x3f0\\,axi_id\\=0x0b0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usdhc1 write to ddr",
+ "MetricName": "imx94_ddr_write.usdhc1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x0b0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usdhc2 read from ddr",
+ "MetricName": "imx94_ddr_read.usdhc2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x3f0\\,axi_id\\=0x0c0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usdhc2 write to ddr",
+ "MetricName": "imx94_ddr_write.usdhc2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x0c0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usdhc3 read from ddr",
+ "MetricName": "imx94_ddr_read.usdhc3",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x3f0\\,axi_id\\=0x0d0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usdhc3 write to ddr",
+ "MetricName": "imx94_ddr_write.usdhc3",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x0d0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of xspi read from ddr",
+ "MetricName": "imx94_ddr_read.xspi",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x3f0\\,axi_id\\=0x0f0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of xspi write to ddr",
+ "MetricName": "imx94_ddr_write.xspi",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x0f0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of pcie1 read from ddr",
+ "MetricName": "imx94_ddr_read.pcie1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x3f0\\,axi_id\\=0x100@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of pcie1 write to ddr",
+ "MetricName": "imx94_ddr_write.pcie1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x100@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usb1 read from ddr",
+ "MetricName": "imx94_ddr_read.usb1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x3f0\\,axi_id\\=0x140@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usb1 write to ddr",
+ "MetricName": "imx94_ddr_write.usb1",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x140@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usb2 read from ddr",
+ "MetricName": "imx94_ddr_read.usb2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt2\\,axi_mask\\=0x3f0\\,axi_id\\=0x150@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of usb2 write to ddr",
+ "MetricName": "imx94_ddr_write.usb2",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x150@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of pxp read from ddr",
+ "MetricName": "imx94_ddr_read.pxp",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x3f0\\,axi_id\\=0x2a0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of pxp write to ddr",
+ "MetricName": "imx94_ddr_write.pxp",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x2a0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of dcif read from ddr",
+ "MetricName": "imx94_ddr_read.dcif",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt1\\,axi_mask\\=0x3f0\\,axi_id\\=0x2b0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ },
+ {
+ "BriefDescription": "bytes of dcif write to ddr",
+ "MetricName": "imx94_ddr_write.dcif",
+ "MetricExpr": "( imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x3f0\\,axi_id\\=0x2b0@ ) * 32",
+ "ScaleUnit": "9.765625e-4KB",
+ "Unit": "imx9_ddr",
+ "Compat": "imx94"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/common/common/legacy-hardware.json b/tools/perf/pmu-events/arch/common/common/legacy-hardware.json
new file mode 100644
index 000000000000..71700647f19b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/common/common/legacy-hardware.json
@@ -0,0 +1,72 @@
+[
+ {
+ "EventName": "cpu-cycles",
+ "BriefDescription": "Total cycles. Be wary of what happens during CPU frequency scaling [This event is an alias of cycles].",
+ "LegacyConfigCode": "0"
+ },
+ {
+ "EventName": "cycles",
+ "BriefDescription": "Total cycles. Be wary of what happens during CPU frequency scaling [This event is an alias of cpu-cycles].",
+ "LegacyConfigCode": "0"
+ },
+ {
+ "EventName": "instructions",
+ "BriefDescription": "Retired instructions. Be careful, these can be affected by various issues, most notably hardware interrupt counts.",
+ "LegacyConfigCode": "1"
+ },
+ {
+ "EventName": "cache-references",
+ "BriefDescription": "Cache accesses. Usually this indicates Last Level Cache accesses but this may vary depending on your CPU. This may include prefetches and coherency messages; again this depends on the design of your CPU.",
+ "LegacyConfigCode": "2"
+ },
+ {
+ "EventName": "cache-misses",
+ "BriefDescription": "Cache misses. Usually this indicates Last Level Cache misses; this is intended to be used in conjunction with the PERF_COUNT_HW_CACHE_REFERENCES event to calculate cache miss rates.",
+ "LegacyConfigCode": "3"
+ },
+ {
+ "EventName": "branches",
+ "BriefDescription": "Retired branch instructions [This event is an alias of branch-instructions].",
+ "LegacyConfigCode": "4"
+ },
+ {
+ "EventName": "branch-instructions",
+ "BriefDescription": "Retired branch instructions [This event is an alias of branches].",
+ "LegacyConfigCode": "4"
+ },
+ {
+ "EventName": "branch-misses",
+ "BriefDescription": "Mispredicted branch instructions.",
+ "LegacyConfigCode": "5"
+ },
+ {
+ "EventName": "bus-cycles",
+ "BriefDescription": "Bus cycles, which can be different from total cycles.",
+ "LegacyConfigCode": "6"
+ },
+ {
+ "EventName": "stalled-cycles-frontend",
+ "BriefDescription": "Stalled cycles during issue [This event is an alias of idle-cycles-frontend].",
+ "LegacyConfigCode": "7"
+ },
+ {
+ "EventName": "idle-cycles-frontend",
+ "BriefDescription": "Stalled cycles during issue [This event is an alias of stalled-cycles-fronted].",
+ "LegacyConfigCode": "7"
+ },
+ {
+ "EventName": "stalled-cycles-backend",
+ "BriefDescription": "Stalled cycles during retirement [This event is an alias of idle-cycles-backend].",
+ "LegacyConfigCode": "8"
+ },
+ {
+ "EventName": "idle-cycles-backend",
+ "BriefDescription": "Stalled cycles during retirement [This event is an alias of stalled-cycles-backend].",
+ "LegacyConfigCode": "8"
+ },
+ {
+ "EventName": "ref-cycles",
+ "BriefDescription": "Total cycles; not affected by CPU frequency scaling.",
+ "LegacyConfigCode": "9"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/common/common/metrics.json b/tools/perf/pmu-events/arch/common/common/metrics.json
new file mode 100644
index 000000000000..0d010b3ebc6d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/common/common/metrics.json
@@ -0,0 +1,151 @@
+[
+ {
+ "BriefDescription": "Average CPU utilization",
+ "MetricExpr": "(software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@) / (duration_time * 1e9)",
+ "MetricGroup": "Default",
+ "MetricName": "CPUs_utilized",
+ "ScaleUnit": "1CPUs",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Context switches per CPU second",
+ "MetricExpr": "(software@context\\-switches\\,name\\=context\\-switches@ * 1e9) / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)",
+ "MetricGroup": "Default",
+ "MetricName": "cs_per_second",
+ "ScaleUnit": "1cs/sec",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Process migrations to a new CPU per CPU second",
+ "MetricExpr": "(software@cpu\\-migrations\\,name\\=cpu\\-migrations@ * 1e9) / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)",
+ "MetricGroup": "Default",
+ "MetricName": "migrations_per_second",
+ "ScaleUnit": "1migrations/sec",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Page faults per CPU second",
+ "MetricExpr": "(software@page\\-faults\\,name\\=page\\-faults@ * 1e9) / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)",
+ "MetricGroup": "Default",
+ "MetricName": "page_faults_per_second",
+ "ScaleUnit": "1faults/sec",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Instructions Per Cycle",
+ "MetricExpr": "instructions / cpu\\-cycles",
+ "MetricGroup": "Default",
+ "MetricName": "insn_per_cycle",
+ "MetricThreshold": "insn_per_cycle < 1",
+ "ScaleUnit": "1instructions",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Max front or backend stalls per instruction",
+ "MetricExpr": "max(stalled\\-cycles\\-frontend, stalled\\-cycles\\-backend) / instructions",
+ "MetricGroup": "Default",
+ "MetricName": "stalled_cycles_per_instruction",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Frontend stalls per cycle",
+ "MetricExpr": "stalled\\-cycles\\-frontend / cpu\\-cycles",
+ "MetricGroup": "Default",
+ "MetricName": "frontend_cycles_idle",
+ "MetricThreshold": "frontend_cycles_idle > 0.1",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Backend stalls per cycle",
+ "MetricExpr": "stalled\\-cycles\\-backend / cpu\\-cycles",
+ "MetricGroup": "Default",
+ "MetricName": "backend_cycles_idle",
+ "MetricThreshold": "backend_cycles_idle > 0.2",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Cycles per CPU second",
+ "MetricExpr": "cpu\\-cycles / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)",
+ "MetricGroup": "Default",
+ "MetricName": "cycles_frequency",
+ "ScaleUnit": "1GHz",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Branches per CPU second",
+ "MetricExpr": "branches / (software@cpu\\-clock\\,name\\=cpu\\-clock@ if #target_cpu else software@task\\-clock\\,name\\=task\\-clock@)",
+ "MetricGroup": "Default",
+ "MetricName": "branch_frequency",
+ "ScaleUnit": "1000M/sec",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "Branch miss rate",
+ "MetricExpr": "branch\\-misses / branches",
+ "MetricGroup": "Default",
+ "MetricName": "branch_miss_rate",
+ "MetricThreshold": "branch_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "L1D miss rate",
+ "MetricExpr": "L1\\-dcache\\-load\\-misses / L1\\-dcache\\-loads",
+ "MetricGroup": "Default2",
+ "MetricName": "l1d_miss_rate",
+ "MetricThreshold": "l1d_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "LLC miss rate",
+ "MetricExpr": "LLC\\-load\\-misses / LLC\\-loads",
+ "MetricGroup": "Default2",
+ "MetricName": "llc_miss_rate",
+ "MetricThreshold": "llc_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "L1I miss rate",
+ "MetricExpr": "L1\\-icache\\-load\\-misses / L1\\-icache\\-loads",
+ "MetricGroup": "Default3",
+ "MetricName": "l1i_miss_rate",
+ "MetricThreshold": "l1i_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "dTLB miss rate",
+ "MetricExpr": "dTLB\\-load\\-misses / dTLB\\-loads",
+ "MetricGroup": "Default3",
+ "MetricName": "dtlb_miss_rate",
+ "MetricThreshold": "dtlb_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "iTLB miss rate",
+ "MetricExpr": "iTLB\\-load\\-misses / iTLB\\-loads",
+ "MetricGroup": "Default3",
+ "MetricName": "itlb_miss_rate",
+ "MetricThreshold": "itlb_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ },
+ {
+ "BriefDescription": "L1 prefetch miss rate",
+ "MetricExpr": "L1\\-dcache\\-prefetch\\-misses / L1\\-dcache\\-prefetches",
+ "MetricGroup": "Default4",
+ "MetricName": "l1_prefetch_miss_rate",
+ "MetricThreshold": "l1_prefetch_miss_rate > 0.05",
+ "ScaleUnit": "100%",
+ "DefaultShowEvents": "1"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/common/common/software.json b/tools/perf/pmu-events/arch/common/common/software.json
index f2551f1107fd..e6819ae219bb 100644
--- a/tools/perf/pmu-events/arch/common/common/software.json
+++ b/tools/perf/pmu-events/arch/common/common/software.json
@@ -3,13 +3,15 @@
"Unit": "software",
"EventName": "cpu-clock",
"BriefDescription": "Per-CPU high-resolution timer based event",
- "ConfigCode": "0"
+ "ConfigCode": "0",
+ "ScaleUnit": "1e-6msec"
},
{
"Unit": "software",
"EventName": "task-clock",
"BriefDescription": "Per-task high-resolution timer based event",
- "ConfigCode": "1"
+ "ConfigCode": "1",
+ "ScaleUnit": "1e-6msec"
},
{
"Unit": "software",
diff --git a/tools/perf/pmu-events/arch/common/common/tool.json b/tools/perf/pmu-events/arch/common/common/tool.json
index 12f2ef1813a6..14d0d60a1976 100644
--- a/tools/perf/pmu-events/arch/common/common/tool.json
+++ b/tools/perf/pmu-events/arch/common/common/tool.json
@@ -70,5 +70,17 @@
"EventName": "system_tsc_freq",
"BriefDescription": "The amount a Time Stamp Counter (TSC) increases per second",
"ConfigCode": "12"
+ },
+ {
+ "Unit": "tool",
+ "EventName": "core_wide",
+ "BriefDescription": "1 if not SMT, if SMT are events being gathered on all SMT threads 1 otherwise 0",
+ "ConfigCode": "13"
+ },
+ {
+ "Unit": "tool",
+ "EventName": "target_cpu",
+ "BriefDescription": "1 if CPUs being analyzed, 0 if threads/processes",
+ "ConfigCode": "14"
}
]
diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv
index 0a7e7dcc81be..d5eea7f9aa9a 100644
--- a/tools/perf/pmu-events/arch/riscv/mapfile.csv
+++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv
@@ -20,5 +20,6 @@
0x489-0x8000000000000008-0x[[:xdigit:]]+,v1,sifive/p550,core
0x489-0x8000000000000[1-6]08-0x[9b][[:xdigit:]]+,v1,sifive/p650,core
0x5b7-0x0-0x0,v1,thead/c900-legacy,core
+0x5b7-0x80000000090c0d00-0x2047000,v1,thead/c900-legacy,core
0x67e-0x80000000db0000[89]0-0x[[:xdigit:]]+,v1,starfive/dubhe-80,core
0x31e-0x8000000000008a45-0x[[:xdigit:]]+,v1,andes/ax45,core
diff --git a/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json
index 3ab1d3a6638c..57b785307a85 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z16/transaction.json
@@ -7,17 +7,17 @@
{
"BriefDescription": "Cycles per Instruction",
"MetricName": "cpi",
- "MetricExpr": "CPU_CYCLES / INSTRUCTIONS if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "CPU_CYCLES / INSTRUCTIONS if has_event(CPU_CYCLES) else 0"
},
{
"BriefDescription": "Problem State Instruction Ratio",
"MetricName": "prbstate",
- "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100 if has_event(PROBLEM_STATE_INSTRUCTIONS) else 0"
},
{
"BriefDescription": "Level One Miss per 100 Instructions",
"MetricName": "l1mp",
- "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100 if has_event(L1I_DIR_WRITES) else 0"
},
{
"BriefDescription": "Percentage sourced from Level 2 cache",
@@ -52,7 +52,7 @@
{
"BriefDescription": "Estimated Instruction Complexity CPI infinite Level 1",
"MetricName": "est_cpi",
- "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS) if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS) if has_event(CPU_CYCLES) else 0"
},
{
"BriefDescription": "Estimated Sourcing Cycles per Level 1 Miss",
diff --git a/tools/perf/pmu-events/arch/s390/cf_z17/transaction.json b/tools/perf/pmu-events/arch/s390/cf_z17/transaction.json
index 74df533c8b6f..7ded6a5a76c0 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z17/transaction.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z17/transaction.json
@@ -7,17 +7,17 @@
{
"BriefDescription": "Cycles per Instruction",
"MetricName": "cpi",
- "MetricExpr": "CPU_CYCLES / INSTRUCTIONS if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "CPU_CYCLES / INSTRUCTIONS if has_event(CPU_CYCLES) else 0"
},
{
"BriefDescription": "Problem State Instruction Ratio",
"MetricName": "prbstate",
- "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "(PROBLEM_STATE_INSTRUCTIONS / INSTRUCTIONS) * 100 if has_event(PROBLEM_STATE_INSTRUCTIONS) else 0"
},
{
"BriefDescription": "Level One Miss per 100 Instructions",
"MetricName": "l1mp",
- "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100 if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "((L1I_DIR_WRITES + L1D_DIR_WRITES) / INSTRUCTIONS) * 100 if has_event(L1I_DIR_WRITES) else 0"
},
{
"BriefDescription": "Percentage sourced from Level 2 cache",
@@ -52,7 +52,7 @@
{
"BriefDescription": "Estimated Instruction Complexity CPI infinite Level 1",
"MetricName": "est_cpi",
- "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS) if has_event(INSTRUCTIONS) else 0"
+ "MetricExpr": "(CPU_CYCLES / INSTRUCTIONS) - (L1C_TLB2_MISSES / INSTRUCTIONS) if has_event(L1C_TLB2_MISSES) else 0"
},
{
"BriefDescription": "Estimated Sourcing Cycles per Level 1 Miss",
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
index 377dfecd96bd..cae7c0cf02f2 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -1,56 +1,56 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
@@ -552,7 +552,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_atom@",
"MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_atom"
},
@@ -751,7 +751,7 @@
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvOB;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -790,11 +790,20 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full",
"Unit": "cpu_core"
},
@@ -802,23 +811,14 @@
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
- "Unit": "cpu_core"
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20",
@@ -826,7 +826,7 @@
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -862,7 +862,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -879,7 +879,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
- "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
@@ -992,7 +992,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(25 * tma_info_system_core_frequency * (cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD@ * (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ / (cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM@ + cpu_core@OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD@))) + 24 * tma_info_system_core_frequency * cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "BvMS;DataSharing;LockCont;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -1109,7 +1108,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -1238,7 +1237,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
- "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1851,7 +1850,7 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "cpu_core@UOPS_EXECUTED.THREAD@ / (cpu_core@UOPS_EXECUTED.CORE_CYCLES_GE_1@ / 2 if #SMT_on else cpu_core@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute",
@@ -1912,7 +1911,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc\\,cpu=cpu_core@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency",
"Unit": "cpu_core"
@@ -1926,7 +1925,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_core@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized",
"Unit": "cpu_core"
@@ -1936,7 +1935,7 @@
"MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / tma_info_system_time / 1e3",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full",
"Unit": "cpu_core"
},
{
@@ -1980,7 +1979,6 @@
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(UNC_ARB_TRK_OCCUPANCY.RD + UNC_ARB_DAT_OCCUPANCY.RD) / UNC_ARB_TRK_REQUESTS.RD",
"MetricGroup": "Mem;MemoryLat;SoC",
"MetricName": "tma_info_system_mem_read_latency",
@@ -2032,6 +2030,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+ "MetricExpr": "tma_info_system_socket_clks / 1e9 / tma_info_system_time",
+ "MetricGroup": "SoC",
+ "MetricName": "tma_info_system_uncore_frequency",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.THREAD@",
"MetricGroup": "Pipeline",
@@ -2150,12 +2155,12 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (cpu_core@MEM_INST_RETIRED.ALL_LOADS@ - cpu_core@MEM_LOAD_RETIRED.FB_HIT@ - cpu_core@MEM_LOAD_RETIRED.L1_MISS@) * 20 / 100, max(cpu_core@CYCLE_ACTIVITY.CYCLES_MEM_ANY@ - cpu_core@MEMORY_ACTIVITY.CYCLES_L1D_MISS@, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2171,7 +2176,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L2 cache under unloaded scenarios (possibly L2 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "3 * tma_info_system_core_frequency * cpu_core@MEM_LOAD_RETIRED.L2_HIT@ * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_l2_bound_group",
"MetricName": "tma_l2_hit_latency",
@@ -2192,12 +2196,11 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "9 * tma_info_system_core_frequency * (cpu_core@MEM_LOAD_RETIRED.L3_HIT@ * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2279,6 +2282,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(16 * max(0, cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ - cpu_core@L2_RQSTS.ALL_RFO@) + cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ / cpu_core@MEM_INST_RETIRED.ALL_STORES@ * (10 * cpu_core@L2_RQSTS.RFO_HIT@ + min(cpu_core@CPU_CLK_UNHALTED.THREAD@, cpu_core@OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO@))) / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -2314,7 +2318,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2324,13 +2328,13 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
- "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -2341,7 +2345,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -2400,7 +2403,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "max(cpu_core@IDQ.MS_CYCLES_ANY@, cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@ / (cpu_core@UOPS_RETIRED.SLOTS@ / cpu_core@UOPS_ISSUED.ANY@)) / tma_info_core_core_clks / 2",
+ "MetricExpr": "max(cpu_core@IDQ.MS_CYCLES_ANY@, cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@ / (cpu_core@UOPS_RETIRED.SLOTS@ / cpu_core@UOPS_ISSUED.ANY@)) / tma_info_core_core_clks / 2.4",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -2439,6 +2442,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -2507,6 +2511,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@)) / tma_info_thread_clks if cpu_core@ARITH.DIV_ACTIVE@ < cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ else (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -2517,6 +2522,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "(cpu_core@EXE_ACTIVITY.EXE_BOUND_0_PORTS@ + max(cpu_core@RS.EMPTY_RESOURCE@ - cpu_core@RESOURCE_STALLS.SCOREBOARD@, 0)) / tma_info_thread_clks * (cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
@@ -2527,6 +2533,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
@@ -2537,7 +2544,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -2548,7 +2554,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks",
"MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -2560,7 +2565,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2591,7 +2596,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -2626,7 +2630,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/cache.json b/tools/perf/pmu-events/arch/x86/alderlake/cache.json
index 5461576dafc7..be15a7f83717 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/cache.json
@@ -4,7 +4,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.HWPF_MISS",
- "PublicDescription": "L1D.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -14,7 +13,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.REPLACEMENT",
- "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace. Available PDIST counters: 0",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -24,7 +23,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -36,7 +35,7 @@
"EdgeDetect": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
- "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -47,7 +46,6 @@
"Deprecated": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALL",
- "PublicDescription": "This event is deprecated. Refer to new event L1D_PEND_MISS.L2_STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -57,7 +55,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALLS",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -67,7 +65,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING",
- "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -78,7 +76,7 @@
"CounterMask": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "PublicDescription": "Counts duration of L1D miss outstanding in cycles. Available PDIST counters: 0",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -88,7 +86,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.ALL",
- "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
"SampleAfterValue": "100003",
"UMask": "0x1f",
"Unit": "cpu_core"
@@ -98,7 +96,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.NON_SILENT",
- "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3 Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -108,7 +106,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.SILENT",
- "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event.",
"SampleAfterValue": "200003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -118,7 +116,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.USELESS_HWPF",
- "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
"SampleAfterValue": "200003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -137,7 +135,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.ALL",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES]",
"SampleAfterValue": "200003",
"UMask": "0xff",
"Unit": "cpu_core"
@@ -167,7 +165,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f",
"Unit": "cpu_core"
@@ -177,7 +175,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_CODE_RD",
- "PublicDescription": "Counts the total number of L2 code requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of L2 code requests.",
"SampleAfterValue": "200003",
"UMask": "0xe4",
"Unit": "cpu_core"
@@ -187,7 +185,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0xe1",
"Unit": "cpu_core"
@@ -197,7 +195,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
- "PublicDescription": "Counts demand requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x27",
"Unit": "cpu_core"
@@ -207,7 +205,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_HWPF",
- "PublicDescription": "L2_RQSTS.ALL_HWPF Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0xf0",
"Unit": "cpu_core"
@@ -217,7 +214,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_RFO",
- "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
"SampleAfterValue": "200003",
"UMask": "0xe2",
"Unit": "cpu_core"
@@ -227,7 +224,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_HIT",
- "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
"SampleAfterValue": "200003",
"UMask": "0xc4",
"Unit": "cpu_core"
@@ -237,7 +234,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_MISS",
- "PublicDescription": "Counts L2 cache misses when fetching instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
"SampleAfterValue": "200003",
"UMask": "0x24",
"Unit": "cpu_core"
@@ -247,7 +244,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc1",
"Unit": "cpu_core"
@@ -257,7 +254,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
- "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0x21",
"Unit": "cpu_core"
@@ -267,7 +264,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HWPF_MISS",
- "PublicDescription": "L2_RQSTS.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x30",
"Unit": "cpu_core"
@@ -277,7 +273,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f",
"Unit": "cpu_core"
@@ -287,7 +283,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.REFERENCES",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL]",
"SampleAfterValue": "200003",
"UMask": "0xff",
"Unit": "cpu_core"
@@ -297,7 +293,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_HIT",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc2",
"Unit": "cpu_core"
@@ -307,7 +303,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_MISS",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x22",
"Unit": "cpu_core"
@@ -317,7 +313,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_HIT",
- "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0xc8",
"Unit": "cpu_core"
@@ -327,7 +323,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_MISS",
- "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0x28",
"Unit": "cpu_core"
@@ -337,7 +333,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x23",
"EventName": "L2_TRANS.L2_WB",
- "PublicDescription": "Counts L2 writebacks that access L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -357,7 +353,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
- "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x41",
"Unit": "cpu_core"
@@ -377,7 +373,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.REFERENCE",
- "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x4f",
"Unit": "cpu_core"
@@ -552,7 +548,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
- "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss) Available PDIST counters: 0",
+ "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)",
"SampleAfterValue": "1000003",
"UMask": "0xfd",
"Unit": "cpu_core"
@@ -853,7 +849,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x44",
"EventName": "MEM_STORE_RETIRED.L2_HIT",
- "PublicDescription": "MEM_STORE_RETIRED.L2_HIT Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -882,7 +877,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
@@ -895,7 +890,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
@@ -908,7 +903,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
@@ -921,7 +916,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
@@ -934,7 +929,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
@@ -947,7 +942,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
@@ -960,7 +955,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
@@ -973,7 +968,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
@@ -1050,7 +1045,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe5",
"EventName": "MEM_UOP_RETIRED.ANY",
- "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses Available PDIST counters: 0",
+ "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -1068,6 +1063,30 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts writebacks of modified cachelines that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_M.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1F803C0008",
+ "PublicDescription": "Counts writebacks of modified cachelines that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts writebacks of non-modified cachelines that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_NONM.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1F803C1000",
+ "PublicDescription": "Counts writebacks of non-modified cachelines that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any type of response.",
"Counter": "0,1,2,3,4,5",
"EventCode": "0xB7",
@@ -1308,6 +1327,18 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x1F803C4477",
+ "PublicDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts L1 data cache software prefetches which include T0/T1/T2 and NTA (except PREFETCHW) that have any type of response.",
"Counter": "0,1,2,3,4,5",
"EventCode": "0xB7",
@@ -1372,7 +1403,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "PublicDescription": "OFFCORE_REQUESTS.ALL_REQUESTS Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -1382,7 +1412,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DATA_RD",
- "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1392,7 +1422,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
- "PublicDescription": "Counts both cacheable and non-cacheable code read requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1402,7 +1432,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore. Available PDIST counters: 0",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1412,7 +1442,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
- "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1424,7 +1454,6 @@
"Errata": "ADL038",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
- "PublicDescription": "This event is deprecated. Refer to new event OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1436,7 +1465,6 @@
"Errata": "ADL038",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1447,7 +1475,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1458,7 +1486,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "PublicDescription": "Cycles where at least 1 outstanding demand data read request is pending. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1469,7 +1496,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1480,7 +1507,6 @@
"Errata": "ADL038",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1490,7 +1516,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1500,7 +1526,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1510,7 +1536,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x2c",
"EventName": "SQ_MISC.BUS_LOCK",
- "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory. Available PDIST counters: 0",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1520,7 +1546,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.ANY",
- "PublicDescription": "Counts the number of PREFETCHNTA, PREFETCHW, PREFETCHT0, PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0xf",
"Unit": "cpu_core"
@@ -1530,7 +1555,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.NTA",
- "PublicDescription": "Counts the number of PREFETCHNTA instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1540,7 +1565,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
- "PublicDescription": "Counts the number of PREFETCHW instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1550,7 +1575,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T0",
- "PublicDescription": "Counts the number of PREFETCHT0 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1560,7 +1585,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
index d01f1b163ed8..62fd70f220e5 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
@@ -14,7 +14,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FPDIV_ACTIVE",
- "PublicDescription": "ARITH.FPDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -33,7 +32,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.FP",
- "PublicDescription": "Counts all microcode Floating Point assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -43,7 +42,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.SSE_AVX_MIX",
- "PublicDescription": "ASSISTS.SSE_AVX_MIX Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -53,7 +51,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -63,7 +60,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -73,7 +69,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -83,7 +78,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V0",
- "PublicDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -93,7 +87,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V1",
- "PublicDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -103,7 +96,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V2",
- "PublicDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -113,7 +105,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -123,7 +115,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -133,7 +125,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -143,7 +135,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -153,7 +145,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x18",
"Unit": "cpu_core"
@@ -163,7 +155,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -173,7 +165,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -183,7 +175,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -193,7 +185,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.VECTOR",
- "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0xfc",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
index dae3174a74fb..ff3b30c2619a 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
@@ -14,7 +14,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x60",
"EventName": "BACLEARS.ANY",
- "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -24,7 +24,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "500009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -34,7 +34,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.MS_BUSY",
- "PublicDescription": "Cycles the Microcode Sequencer is busy. Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -44,7 +43,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x61",
"EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
- "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE. Available PDIST counters: 0",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -302,7 +301,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALLS",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
"SampleAfterValue": "500009",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -314,7 +313,6 @@
"EdgeDetect": "1",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALL_PERIODS",
- "PublicDescription": "ICACHE_DATA.STALL_PERIODS Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -324,7 +322,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x83",
"EventName": "ICACHE_TAG.STALLS",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"SampleAfterValue": "200003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -335,7 +333,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -346,7 +344,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -356,7 +354,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -367,7 +365,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -378,7 +376,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -388,7 +386,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MITE_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -399,7 +397,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_CYCLES_ANY",
- "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -411,7 +409,7 @@
"EdgeDetect": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_SWITCHES",
- "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer. Available PDIST counters: 0",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -421,7 +419,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MS_UOPS",
- "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS).",
"SampleAfterValue": "1000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -431,7 +429,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -442,7 +440,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -454,7 +452,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -464,7 +462,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -475,7 +473,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -487,7 +485,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/memory.json b/tools/perf/pmu-events/arch/x86/alderlake/memory.json
index 07f5786bdbc0..a0260d5b8619 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/memory.json
@@ -5,7 +5,6 @@
"CounterMask": "6",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x6",
"Unit": "cpu_core"
@@ -79,7 +78,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -90,7 +89,6 @@
"CounterMask": "2",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -101,7 +99,6 @@
"CounterMask": "3",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -112,7 +109,7 @@
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_core"
@@ -123,7 +120,7 @@
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x9",
"Unit": "cpu_core"
@@ -417,7 +414,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Counts demand data read requests that miss the L3 cache. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -427,7 +423,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/other.json b/tools/perf/pmu-events/arch/x86/alderlake/other.json
index 5f64138edfe4..af46cde26b54 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/other.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.HARDWARE",
- "PublicDescription": "Count all other hardware assists or traps that are not necessarily architecturally exposed (through a software handler) beyond FP; SSE-AVX mix and A/D assists who are counted by dedicated sub-events. This includes, but not limited to, assists at EXE or MEM uop writeback like AVX* load/store/gather/scatter (non-FP GSSE-assist ) , assists generated by ROB like PEBS and RTIT, Uncore trap, RAR (Remote Action Request) and CET (Control flow Enforcement Technology) assists. the event also counts for Machine Ordering count. Available PDIST counters: 0",
+ "PublicDescription": "Count all other hardware assists or traps that are not necessarily architecturally exposed (through a software handler) beyond FP; SSE-AVX mix and A/D assists who are counted by dedicated sub-events. This includes, but not limited to, assists at EXE or MEM uop writeback like AVX* load/store/gather/scatter (non-FP GSSE-assist ) , assists generated by ROB like PEBS and RTIT, Uncore trap, RAR (Remote Action Request) and CET (Control flow Enforcement Technology) assists. the event also counts for Machine Ordering count.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -14,7 +14,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.PAGE_FAULT",
- "PublicDescription": "ASSISTS.PAGE_FAULT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -24,7 +23,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LICENSE_1",
- "PublicDescription": "CORE_POWER.LICENSE_1 Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -34,7 +32,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LICENSE_2",
- "PublicDescription": "CORE_POWER.LICENSE_2 Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -44,7 +41,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x28",
"EventName": "CORE_POWER.LICENSE_3",
- "PublicDescription": "CORE_POWER.LICENSE_3 Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -113,7 +109,7 @@
"CounterMask": "1",
"EventCode": "0x2d",
"EventName": "XQ.FULL_CYCLES",
- "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache). Available PDIST counters: 0",
+ "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache).",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
index 48ef2a8cc49a..57a8c78cdc49 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
@@ -6,7 +6,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x9",
"Unit": "cpu_core"
@@ -27,14 +26,15 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIV_ACTIVE",
- "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
"SampleAfterValue": "1000003",
"UMask": "0x9",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Counts the number of active floating point and integer dividers per cycle.",
+ "BriefDescription": "This event is deprecated.",
"Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "ARITH.DIV_OCCUPANCY",
"SampleAfterValue": "1000003",
@@ -42,8 +42,9 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of floating point and integer divider uops executed per cycle.",
+ "BriefDescription": "This event is deprecated.",
"Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "ARITH.DIV_UOPS",
"SampleAfterValue": "1000003",
@@ -57,7 +58,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FP_DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.FPDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -78,7 +78,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.IDIV_ACTIVE",
- "PublicDescription": "This event counts the cycles the integer divider is busy. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -108,7 +107,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.INT_DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -118,7 +116,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.ANY",
- "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.",
"SampleAfterValue": "100003",
"UMask": "0x1b",
"Unit": "cpu_core"
@@ -549,7 +547,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C01",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -559,7 +557,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C02",
- "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -569,7 +567,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C0_WAIT",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
"SampleAfterValue": "2000003",
"UMask": "0x70",
"Unit": "cpu_core"
@@ -597,7 +595,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
- "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -607,7 +605,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
- "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted. Available PDIST counters: 0",
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
"SampleAfterValue": "25003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -617,7 +615,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -629,7 +626,6 @@
"EdgeDetect": "1",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE_INST Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -649,7 +645,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
- "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -687,7 +683,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -724,7 +720,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. Available PDIST counters: 0",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
"SampleAfterValue": "2000003",
"Unit": "cpu_core"
},
@@ -734,7 +730,6 @@
"CounterMask": "8",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -745,7 +740,6 @@
"CounterMask": "1",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
- "PublicDescription": "Cycles while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -756,7 +750,6 @@
"CounterMask": "16",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
- "PublicDescription": "Cycles while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -767,7 +760,6 @@
"CounterMask": "12",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xc",
"Unit": "cpu_core"
@@ -778,7 +770,6 @@
"CounterMask": "5",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_core"
@@ -789,7 +780,6 @@
"CounterMask": "4",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
- "PublicDescription": "Total execution stalls. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -799,7 +789,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -809,7 +799,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 2 or 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0xc",
"Unit": "cpu_core"
@@ -819,7 +808,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -829,7 +818,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -839,7 +828,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
- "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -850,7 +839,6 @@
"CounterMask": "5",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
- "PublicDescription": "Execution stalls while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x21",
"Unit": "cpu_core"
@@ -861,7 +849,7 @@
"CounterMask": "2",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
- "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
"SampleAfterValue": "1000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -871,7 +859,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
- "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load. Available PDIST counters: 0",
+ "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.",
"SampleAfterValue": "1000003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -881,7 +869,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x75",
"EventName": "INST_DECODED.DECODERS",
- "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -927,7 +915,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -937,7 +924,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -956,7 +943,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -968,7 +955,7 @@
"EdgeDetect": "1",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEARS_COUNT",
- "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
"SampleAfterValue": "500009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -978,7 +965,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
- "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path. Available PDIST counters: 0",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
"SampleAfterValue": "500009",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -988,7 +975,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.RECOVERY_CYCLES",
- "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event. Available PDIST counters: 0",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
"SampleAfterValue": "500009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1000,7 +987,6 @@
"EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
"MSRIndex": "0x3F7",
"MSRValue": "0x7",
- "PublicDescription": "Bubble cycles of BAClear (Unknown Branch). Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -1010,7 +996,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.UOP_DROPPING",
- "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons Available PDIST counters: 0",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1020,7 +1006,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.128BIT",
- "PublicDescription": "INT_VEC_RETIRED.128BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x13",
"Unit": "cpu_core"
@@ -1030,7 +1015,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.256BIT",
- "PublicDescription": "INT_VEC_RETIRED.256BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xac",
"Unit": "cpu_core"
@@ -1040,7 +1024,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_128",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -1050,7 +1034,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_256",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0xc",
"Unit": "cpu_core"
@@ -1060,7 +1044,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.MUL_256",
- "PublicDescription": "INT_VEC_RETIRED.MUL_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -1070,7 +1053,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.SHUFFLES",
- "PublicDescription": "INT_VEC_RETIRED.SHUFFLES Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -1080,7 +1062,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_128",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_128 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1090,7 +1071,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_256",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1119,7 +1099,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.ADDRESS_ALIAS",
- "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1138,7 +1118,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.NO_SR",
- "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
"SampleAfterValue": "100003",
"UMask": "0x88",
"Unit": "cpu_core"
@@ -1148,7 +1128,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.STORE_FORWARD",
- "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
"SampleAfterValue": "100003",
"UMask": "0x82",
"Unit": "cpu_core"
@@ -1158,7 +1138,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x4c",
"EventName": "LOAD_HIT_PREFETCH.SWPF",
- "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1169,7 +1149,7 @@
"CounterMask": "1",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_ACTIVE",
- "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1180,7 +1160,7 @@
"CounterMask": "6",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_OK",
- "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1190,7 +1170,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa8",
"EventName": "LSD.UOPS",
- "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1202,7 +1182,7 @@
"EdgeDetect": "1",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.COUNT",
- "PublicDescription": "Counts the number of machine clears (nukes) of any type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1258,7 +1238,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.SMC",
- "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear. Available PDIST counters: 0",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1268,7 +1248,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe0",
"EventName": "MISC2_RETIRED.LFENCE",
- "PublicDescription": "number of LFENCE retired instructions Available PDIST counters: 0",
+ "PublicDescription": "number of LFENCE retired instructions",
"SampleAfterValue": "400009",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1288,7 +1268,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcc",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT. Available PDIST counters: 0",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1298,7 +1278,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SB",
- "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end. Available PDIST counters: 0",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1308,7 +1288,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SCOREBOARD",
- "PublicDescription": "Counts cycles where the pipeline is stalled due to serializing operations. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1318,7 +1297,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY",
- "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses) Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
"SampleAfterValue": "1000003",
"UMask": "0x7",
"Unit": "cpu_core"
@@ -1331,7 +1310,7 @@
"EventCode": "0xa5",
"EventName": "RS.EMPTY_COUNT",
"Invert": "1",
- "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events) Available PDIST counters: 0",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
"SampleAfterValue": "100003",
"UMask": "0x7",
"Unit": "cpu_core"
@@ -1341,7 +1320,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY_RESOURCE",
- "PublicDescription": "Cycles when Reservation Station (RS) is empty due to a resource in the back-end Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1355,7 +1333,6 @@
"EventCode": "0xa5",
"EventName": "RS_EMPTY.COUNT",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event RS.EMPTY_COUNT Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x7",
"Unit": "cpu_core"
@@ -1366,7 +1343,6 @@
"Deprecated": "1",
"EventCode": "0xa5",
"EventName": "RS_EMPTY.CYCLES",
- "PublicDescription": "This event is deprecated. Refer to new event RS.EMPTY Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x7",
"Unit": "cpu_core"
@@ -1395,7 +1371,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
- "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.",
"SampleAfterValue": "10000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1405,7 +1381,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BAD_SPEC_SLOTS",
- "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.",
"SampleAfterValue": "10000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1415,7 +1391,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
- "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction. Available PDIST counters: 0",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1425,7 +1401,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
- "PublicDescription": "TOPDOWN.MEMORY_BOUND_SLOTS Available PDIST counters: 0",
"SampleAfterValue": "10000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1444,7 +1419,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.SLOTS_P",
- "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
"SampleAfterValue": "10000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1661,7 +1636,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x76",
"EventName": "UOPS_DECODED.DEC0_UOPS",
- "PublicDescription": "UOPS_DECODED.DEC0_UOPS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1671,7 +1645,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_0",
- "PublicDescription": "Number of uops dispatch to execution port 0. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 0.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1681,7 +1655,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_1",
- "PublicDescription": "Number of uops dispatch to execution port 1. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 1.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1691,7 +1665,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_2_3_10",
- "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1701,7 +1675,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_4_9",
- "PublicDescription": "Number of uops dispatch to execution ports 4 and 9 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 4 and 9",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1711,7 +1685,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_5_11",
- "PublicDescription": "Number of uops dispatch to execution ports 5 and 11 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 5 and 11",
"SampleAfterValue": "2000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1721,7 +1695,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_6",
- "PublicDescription": "Number of uops dispatch to execution port 6. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 6.",
"SampleAfterValue": "2000003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -1731,7 +1705,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_7_8",
- "PublicDescription": "Number of uops dispatch to execution ports 7 and 8. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.",
"SampleAfterValue": "2000003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -1742,7 +1716,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
- "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1753,7 +1727,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
- "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1764,7 +1738,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
- "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1775,7 +1749,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
- "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1786,7 +1760,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_1",
- "PublicDescription": "Cycles where at least 1 uop was executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1797,7 +1771,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_2",
- "PublicDescription": "Cycles where at least 2 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1808,7 +1782,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_3",
- "PublicDescription": "Cycles where at least 3 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1819,7 +1793,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_4",
- "PublicDescription": "Cycles where at least 4 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1831,7 +1805,7 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALLS",
"Invert": "1",
- "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1844,7 +1818,6 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALL_CYCLES",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event UOPS_EXECUTED.STALLS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1854,7 +1827,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.THREAD",
- "PublicDescription": "Counts the number of uops to be executed per-thread each cycle. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1864,7 +1836,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.X87",
- "PublicDescription": "Counts the number of x87 uops executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1883,7 +1855,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.ANY",
- "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1894,7 +1866,6 @@
"CounterMask": "1",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.CYCLES",
- "PublicDescription": "UOPS_ISSUED.CYCLES Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1913,7 +1884,7 @@
"CounterMask": "1",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.CYCLES",
- "PublicDescription": "Counts cycles where at least one uop has retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where at least one uop has retired.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1923,7 +1894,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.HEAVY",
- "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1954,7 +1925,6 @@
"EventName": "UOPS_RETIRED.MS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PublicDescription": "UOPS_RETIRED.MS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1964,7 +1934,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.SLOTS",
- "PublicDescription": "Counts the retirement slots used each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the retirement slots used each cycle.",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1976,7 +1946,7 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALLS",
"Invert": "1",
- "PublicDescription": "This event counts cycles without actually retired uops. Available PDIST counters: 0",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1989,7 +1959,6 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALL_CYCLES",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event UOPS_RETIRED.STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/alderlake/uncore-interconnect.json
index 7c0779c74154..b5604c7534e1 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/uncore-interconnect.json
@@ -65,7 +65,6 @@
"Counter": "0,1",
"EventCode": "0x81",
"EventName": "UNC_ARB_REQ_TRK_REQUEST.DRD",
- "Experimental": "1",
"PerPkg": "1",
"UMask": "0x2",
"Unit": "ARB"
@@ -103,7 +102,6 @@
"Counter": "0,1",
"EventCode": "0x81",
"EventName": "UNC_ARB_TRK_REQUESTS.RD",
- "Experimental": "1",
"PerPkg": "1",
"UMask": "0x2",
"Unit": "ARB"
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
index ffbbd08acc68..132ce48af6d9 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
- "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -15,7 +15,7 @@
"CounterMask": "1",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -35,7 +35,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe",
"Unit": "cpu_core"
@@ -45,7 +45,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -55,7 +55,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -65,7 +65,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -75,7 +75,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -85,7 +85,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.STLB_HIT",
- "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -96,7 +96,7 @@
"CounterMask": "1",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -116,7 +116,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe",
"Unit": "cpu_core"
@@ -126,7 +126,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -136,7 +136,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -146,7 +146,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -156,7 +156,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -184,7 +184,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.STLB_HIT",
- "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -195,7 +195,7 @@
"CounterMask": "1",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -215,7 +215,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe",
"Unit": "cpu_core"
@@ -225,7 +225,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -235,7 +235,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -245,7 +245,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
index ce93648043ef..0f72c9192df6 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
@@ -1,56 +1,56 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
@@ -460,12 +460,12 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricName": "tma_info_system_cpu_utilization"
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE_P@k / CPU_CLK_UNHALTED.CORE",
+ "MetricExpr": "CPU_CLK_UNHALTED.CORE_P:k / CPU_CLK_UNHALTED.CORE",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_kernel_utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/cache.json b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json
index 669f4979b651..76a841675337 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/cache.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json
@@ -247,7 +247,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
@@ -259,7 +259,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
@@ -271,7 +271,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
@@ -283,7 +283,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
@@ -295,7 +295,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
@@ -307,7 +307,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
@@ -319,7 +319,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
@@ -331,7 +331,7 @@
},
{
"BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
- "Counter": "0,1,2,3,4,5",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json
index 1dd61baec1a9..d650cbd48c1f 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json
@@ -9,16 +9,18 @@
"UMask": "0x3"
},
{
- "BriefDescription": "Counts the number of active floating point and integer dividers per cycle.",
+ "BriefDescription": "This event is deprecated.",
"Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "ARITH.DIV_OCCUPANCY",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
{
- "BriefDescription": "Counts the number of floating point and integer divider uops executed per cycle.",
+ "BriefDescription": "This event is deprecated.",
"Counter": "0,1,2,3,4,5",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "ARITH.DIV_UOPS",
"SampleAfterValue": "1000003",
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-interconnect.json
index 7c0779c74154..b5604c7534e1 100644
--- a/tools/perf/pmu-events/arch/x86/alderlaken/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-interconnect.json
@@ -65,7 +65,6 @@
"Counter": "0,1",
"EventCode": "0x81",
"EventName": "UNC_ARB_REQ_TRK_REQUEST.DRD",
- "Experimental": "1",
"PerPkg": "1",
"UMask": "0x2",
"Unit": "ARB"
@@ -103,7 +102,6 @@
"Counter": "0,1",
"EventCode": "0x81",
"EventName": "UNC_ARB_TRK_REQUESTS.RD",
- "Experimental": "1",
"PerPkg": "1",
"UMask": "0x2",
"Unit": "ARB"
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/arl-metrics.json b/tools/perf/pmu-events/arch/x86/arrowlake/arl-metrics.json
index b22a02450e6c..4f1f77404943 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/arl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/arl-metrics.json
@@ -1,56 +1,56 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
@@ -567,7 +567,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_atom@",
"MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_atom"
},
@@ -774,7 +774,7 @@
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvOB;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -786,7 +786,7 @@
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-bad\\-spec@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-bad\\-spec@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -813,35 +813,35 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full",
"Unit": "cpu_core"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_capacity / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_capacity / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
- "Unit": "cpu_core"
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20",
@@ -849,7 +849,7 @@
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_microcode_sequencer + tma_few_uops_instructions) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_microcode_sequencer + tma_few_uops_instructions) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -858,7 +858,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
"MetricThreshold": "tma_bottleneck_memory_data_tlbs > 20",
@@ -885,7 +885,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -902,7 +902,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
- "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
@@ -1042,7 +1042,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@R, 24 * tma_info_system_core_frequency) + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM@R, 25 * tma_info_system_core_frequency)) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "BvMS;DataSharing;LockCont;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -1095,7 +1094,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(cpu_core@IDQ.DSB_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ + cpu_core@IDQ.DSB_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
+ "MetricExpr": "(cpu_core@IDQ.DSB_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / 2 + cpu_core@IDQ.DSB_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.STARVATION_CYCLES@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
"MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
@@ -1149,7 +1148,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -1166,7 +1165,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "cpu_core@topdown\\-fetch\\-lat@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-fetch\\-lat@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -1216,7 +1215,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_scalar",
"MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -1226,7 +1225,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.VECTOR@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_OPS_RETIRED.VECTOR@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
"MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -1236,7 +1235,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
- "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@) / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "(cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_SINGLE@) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_128b",
"MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -1246,7 +1245,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.VECTOR\\,umask\\=0x30@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_OPS_RETIRED.VECTOR\\,umask\\=0x30@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_256b",
"MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -1257,7 +1256,7 @@
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-fe\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-fe\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvFB;BvIO;Default;PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -1278,7 +1277,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
- "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1456,7 +1455,7 @@
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_thread_clks",
+ "MetricExpr": "(cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_OPS_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@) / tma_info_thread_clks",
"MetricGroup": "Flops;Ret",
"MetricName": "tma_info_core_flopc",
"Unit": "cpu_core"
@@ -1597,7 +1596,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + cpu_core@FP_ARITH_INST_RETIRED.VECTOR@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + cpu_core@FP_ARITH_OPS_RETIRED.VECTOR@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_iparith",
"MetricThreshold": "tma_info_inst_mix_iparith < 10",
@@ -1606,7 +1605,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
"MetricName": "tma_info_inst_mix_iparith_avx128",
"MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
@@ -1615,7 +1614,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
"MetricName": "tma_info_inst_mix_iparith_avx256",
"MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
@@ -1624,7 +1623,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_OPS_RETIRED.SCALAR_DOUBLE@",
"MetricGroup": "Flops;FpScalar;InsType",
"MetricName": "tma_info_inst_mix_iparith_scalar_dp",
"MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
@@ -1633,7 +1632,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_OPS_RETIRED.SCALAR_SINGLE@",
"MetricGroup": "Flops;FpScalar;InsType",
"MetricName": "tma_info_inst_mix_iparith_scalar_sp",
"MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
@@ -1658,7 +1657,7 @@
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_OPS_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_ipflop",
"MetricThreshold": "tma_info_inst_mix_ipflop < 10",
@@ -1713,7 +1712,7 @@
},
{
"BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * cpu_core@L1D.REPLACEMENT@ / 1e9 / tma_info_system_time",
+ "MetricExpr": "64 * cpu_core@L1D.L1_REPLACEMENT@ / 1e9 / tma_info_system_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "tma_info_memory_l1d_cache_fill_bw",
"Unit": "cpu_core"
@@ -1726,6 +1725,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "L0 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * (cpu_core@MEM_LOAD_RETIRED.L1_MISS@ + cpu_core@MEM_LOAD_RETIRED.L1_HIT_L1@) / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "CacheHits;Mem",
+ "MetricName": "tma_info_memory_l1dl0_mpki",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / cpu_core@INST_RETIRED.ANY@",
"MetricGroup": "CacheHits;Mem",
@@ -1941,6 +1947,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "cpu_core@IDQ.MS_UOPS@ / cpu_core@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY@",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1974,7 +1987,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc\\,cpu=cpu_core@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency",
"Unit": "cpu_core"
@@ -1988,14 +2001,22 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_core@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "32 * UNC_M_TOTAL_DATA / 1e9 / tma_info_system_time",
+ "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / 1e9 / tma_info_system_time",
+ "MetricExpr": "(cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_OPS_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@) / 1e9 / tma_info_system_time",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "tma_info_system_gflops",
"PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width",
@@ -2062,6 +2083,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+ "MetricExpr": "tma_info_system_socket_clks / 1e9 / tma_info_system_time",
+ "MetricGroup": "SoC",
+ "MetricName": "tma_info_system_uncore_frequency",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.THREAD@",
"MetricGroup": "Pipeline",
@@ -2182,12 +2210,12 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
- "MetricExpr": "4 * cpu_core@DEPENDENT_LOADS.ANY@ / tma_info_thread_clks",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "MetricExpr": "4 * cpu_core@DEPENDENT_LOADS.ANY\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2203,7 +2231,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L2 cache under unloaded scenarios (possibly L2 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_LOAD_RETIRED.L2_HIT@ * min(cpu_core@MEM_LOAD_RETIRED.L2_HIT@R, 3 * tma_info_system_core_frequency) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_l2_bound_group",
"MetricName": "tma_l2_hit_latency",
@@ -2224,12 +2251,11 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_LOAD_RETIRED.L3_HIT@ * min(cpu_core@MEM_LOAD_RETIRED.L3_HIT@R, 9 * tma_info_system_core_frequency) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2311,6 +2337,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ * cpu_core@MEM_INST_RETIRED.LOCK_LOADS@R / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -2321,7 +2348,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit",
- "MetricExpr": "cpu_core@LSD.UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@LSD.UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks / 2",
"MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_lsd",
"MetricThreshold": "tma_lsd > 0.15 & tma_fetch_bandwidth > 0.2",
@@ -2346,7 +2373,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2356,13 +2383,13 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
- "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -2373,7 +2400,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -2412,7 +2438,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(cpu_core@IDQ.MITE_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / 2 + cpu_core@IDQ.MITE_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
+ "MetricExpr": "(cpu_core@IDQ.MITE_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / 2 + cpu_core@IDQ.MITE_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.STARVATION_CYCLES@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
"MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
@@ -2432,7 +2458,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "cpu_core@IDQ.MS_CYCLES_ANY@ / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@IDQ.MS_CYCLES_ANY@ / tma_info_thread_clks / 1.8",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -2471,7 +2497,8 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
- "MetricExpr": "max(0, tma_light_operations - (tma_x87_use + (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + cpu_core@FP_ARITH_INST_RETIRED.VECTOR@) / (tma_retiring * tma_info_thread_slots) + (cpu_core@INT_VEC_RETIRED.ADD_128@ + cpu_core@INT_VEC_RETIRED.VNNI_128@ + cpu_core@INT_VEC_RETIRED.ADD_256@ + cpu_core@INT_VEC_RETIRED.MUL_256@ + cpu_core@INT_VEC_RETIRED.VNNI_256@) / (tma_retiring * tma_info_thread_slots) + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "max(0, tma_light_operations - (tma_x87_use + (cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + cpu_core@FP_ARITH_OPS_RETIRED.VECTOR@) / (tma_retiring * tma_info_thread_slots) + (cpu_core@INT_VEC_RETIRED.ADD_128@ + cpu_core@INT_VEC_RETIRED.VNNI_128@ + cpu_core@INT_VEC_RETIRED.ADD_256@ + cpu_core@INT_VEC_RETIRED.MUL_256@ + cpu_core@INT_VEC_RETIRED.VNNI_256@) / (tma_retiring * tma_info_thread_slots) + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
"MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
@@ -2509,6 +2536,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "((cpu_core@EXE_ACTIVITY.EXE_BOUND_0_PORTS@ + (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@)) / tma_info_thread_clks if cpu_core@ARITH.DIV_ACTIVE@ < cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ else (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -2519,6 +2547,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.EXE_BOUND_0_PORTS@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
@@ -2529,6 +2558,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
@@ -2539,7 +2569,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -2550,7 +2579,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks",
"MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -2571,7 +2599,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2586,7 +2614,7 @@
"MetricGroup": "BvIO;PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group;tma_issueSO",
"MetricName": "tma_serializing_operation",
"MetricThreshold": "tma_serializing_operation > 0.1 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
- "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: RESOURCE_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
+ "PublicDescription": "This metric represents fraction of cycles the CPU issue-pipeline was stalled due to serializing operations. Instructions like CPUID; WRMSR or LFENCE serialize the out-of-order execution which may limit performance. Sample with: PARTIAL_RAT_STALLS.SCOREBOARD. Related metrics: tma_ms_switches",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2602,7 +2630,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -2637,7 +2664,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2652,6 +2679,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "This metric estimates clocks wasted due to loads blocked due to unknown store address (did not do memory disambiguation) or due to unknown store data",
+ "MetricExpr": "7 * cpu_core@LD_BLOCKS.STORE_EARLY\\,cmask\\=1@ / tma_info_thread_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
+ "MetricName": "tma_store_early_blk",
+ "MetricThreshold": "tma_store_early_blk > 0.2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
"MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/cache.json b/tools/perf/pmu-events/arch/x86/arrowlake/cache.json
index 91929d8bcf47..fba4a0672f6c 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/cache.json
@@ -16,6 +16,16 @@
"PublicDescription": "Counts the number of L1D cacheline (dirty) evictions caused by load misses, stores, and prefetches. Does not count evictions or dirty writebacks caused by snoops. Does not count a replacement unless a (dirty) line was written back.",
"SampleAfterValue": "200003",
"UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of L1D cacheline (dirty) evictions caused by load misses, stores, and prefetches.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x51",
+ "EventName": "DL1.DIRTY_EVICTION",
+ "PublicDescription": "Counts the number of L1D cacheline (dirty) evictions caused by load misses, stores, and prefetches. Does not count evictions or dirty writebacks caused by snoops. Does not count a replacement unless a (dirty) line was written back.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
"Unit": "cpu_lowpower"
},
{
@@ -29,6 +39,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cachelines replaced into the L1 d-cache. Successful replacements only (not blocked) and exclude WB-miss case",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x51",
+ "EventName": "L1D.L1_REPLACEMENT",
+ "PublicDescription": "Counts cachelines replaced into the L1 d-cache.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Cachelines replaced into the L0 and L1 d-cache. Successful replacements only (not blocked) and exclude WB-miss case",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x51",
@@ -104,6 +124,15 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.E",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Exclusive state",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.E",
"PublicDescription": "Counts the number of cache lines filled into the L2 cache that are in Exclusive state. Counts on a per core basis.",
"SampleAfterValue": "1000003",
"UMask": "0x4",
@@ -114,12 +143,40 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.F",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Forward state",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.F",
"PublicDescription": "Counts the number of cache lines filled into the L2 cache that are in Forward state. Counts on a per core basis.",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Invalid state",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.I",
+ "PublicDescription": "Counts the number of cache lines filled into the L2 cache that are in Invalid state, does not count lines that go Invalid due to an eviction",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Modified state",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.M",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Modified state",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x25",
@@ -134,6 +191,15 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.S",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cache lines filled into the L2 cache that are in Shared state",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x25",
+ "EventName": "L2_LINES_IN.S",
"PublicDescription": "Counts the number of cache lines filled into the L2 cache that are in Shared state. Counts on a per core basis.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
@@ -180,6 +246,16 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of L2 cache lines that have been L2 hardware prefetched but not used by demand accesses",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x26",
+ "EventName": "L2_LINES_OUT.USELESS_HWPF",
+ "PublicDescription": "Counts the number of L2 cache lines that have been L2 hardware prefetched but not used by demand accesses. Increments on the core that brought the line in originally.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cache lines that have been L2 hardware prefetched but not used by demand accesses",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x26",
@@ -190,6 +266,42 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of L2 prefetches initiated by either the L2 Stream or AMP that were throttled due to Dynamic Prefetch Throttling. The throttle requestor/source could be from the uncore/SOC or the Dead Block Predictor. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "L2_PREFETCHES_THROTTLED.DPT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of L2 prefetches initiated by the L2 Stream that were throttled due to Demand Throttle Prefetcher. DTP Global Triggered with no Local Override. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "L2_PREFETCHES_THROTTLED.DTP",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of L2 prefetches initiated by the L2 Stream and not throttled by DTP due to local override. These prefetches may still be throttled due to another throttler mechanism besides DTP. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "L2_PREFETCHES_THROTTLED.DTP_OVERRIDE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of L2 prefetches initiated by either the L2 Stream or AMP that were throttled due to exceeding the XQ threshold set by either XQ_THRESHOLD_DTP or XQ_THRESHOLD. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x28",
+ "EventName": "L2_PREFETCHES_THROTTLED.XQ_THRESH",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of demand and prefetch transactions that the External Queue (XQ) rejects due to a full or near full condition.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x30",
@@ -199,6 +311,16 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of L2 Cache Accesses Counts the total number of L2 Cache Accesses - sum of hits, misses, rejects front door requests for CRd/DRd/RFO/ItoM/L2 Prefetches only, per core event",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.ALL",
+ "PublicDescription": "Counts the number of L2 Cache Accesses Counts the total number of L2 Cache Accesses - sum of hits, misses, rejects front door requests for CRd/DRd/RFO/ItoM/L2 Prefetches only.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x7",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "All accesses to L2 cache [This event is alias to L2_RQSTS.REFERENCES, L2_RQSTS.ANY]",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x24",
@@ -213,11 +335,29 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x24",
"EventName": "L2_REQUEST.HIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of L2 Cache Accesses that resulted in a Hit from a front door request only (does not include rejects or recycles), per core event",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.HIT",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of total L2 Cache Accesses that resulted in a Miss from a front door request only (does not include rejects or recycles), per core event",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Read requests with true-miss in L2 cache [This event is alias to L2_RQSTS.MISS]",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x24",
@@ -237,6 +377,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of L2 Cache Accesses that miss the L2 and get BBL reject short and long rejects (includes those counted in L2_reject_XQ.any), per core event",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x24",
+ "EventName": "L2_REQUEST.REJECTS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of L2 Cache Accesses that miss the L2 and get BBL reject short and long rejects, per core event",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x24",
@@ -356,6 +505,51 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of LLC prefetches that were throttled due to Dynamic Prefetch Throttling. The throttle requestor/source could be from the uncore/SOC or the Dead Block Predictor. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x29",
+ "EventName": "LLC_PREFETCHES_THROTTLED.DPT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of LLC prefetches throttled due to Demand Throttle Prefetcher. DTP Global Triggered with no Local Override. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x29",
+ "EventName": "LLC_PREFETCHES_THROTTLED.DTP",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of LLC prefetches not throttled by DTP due to local override. These prefetches may still be throttled due to another throttler mechanism. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x29",
+ "EventName": "LLC_PREFETCHES_THROTTLED.DTP_OVERRIDE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of LLC prefetches throttled due to LLC hit rate in <insert knob name here>. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x29",
+ "EventName": "LLC_PREFETCHES_THROTTLED.HIT_RATE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of LLC prefetches throttled due to exceeding the XQ threshold set by either XQ_THRESHOLD_DTP or LLC_XQ_THRESHOLD. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x29",
+ "EventName": "LLC_PREFETCHES_THROTTLED.XQ_THRESH",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycles when L1D is locked",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x42",
@@ -366,6 +560,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x41",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Core-originated cacheable requests that missed L3 (Except hardware prefetches to the L3)",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2e",
@@ -376,6 +580,26 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.MISS",
+ "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x41",
+ "Unit": "cpu_lowpower"
+ },
+ {
+ "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x2e",
+ "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+ "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the core has access to an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4f",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Core-originated cacheable requests that refer to L3 (Except hardware prefetches to the L3)",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2e",
@@ -532,6 +756,15 @@
"EventName": "MEM_BOUND_STALLS_LOAD.SBFULL",
"SampleAfterValue": "1000003",
"UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles when the core is stalled to a store buffer full condition",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x34",
+ "EventName": "MEM_BOUND_STALLS_LOAD.SBFULL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
"Unit": "cpu_lowpower"
},
{
@@ -540,7 +773,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_LOADS",
- "PublicDescription": "Counts Instructions with at least one architecturally visible load retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts Instructions with at least one architecturally visible load retired. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x81",
"Unit": "cpu_core"
@@ -551,7 +784,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_STORES",
- "PublicDescription": "Counts all retired store instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired store instructions. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x82",
"Unit": "cpu_core"
@@ -561,7 +794,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_SWPF",
- "PublicDescription": "Counts all retired software prefetch instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired software prefetch instructions. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x84",
"Unit": "cpu_core"
@@ -572,7 +805,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ANY",
- "PublicDescription": "Counts all retired memory instructions - loads and stores. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired memory instructions - loads and stores. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x87",
"Unit": "cpu_core"
@@ -583,7 +816,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.LOCK_LOADS",
- "PublicDescription": "Counts retired load instructions with locked access. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with locked access. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x21",
"Unit": "cpu_core"
@@ -594,7 +827,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
- "PublicDescription": "Counts retired load instructions that split across a cacheline boundary. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions that split across a cacheline boundary. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x41",
"Unit": "cpu_core"
@@ -605,18 +838,29 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.SPLIT_STORES",
- "PublicDescription": "Counts retired store instructions that split across a cacheline boundary. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired store instructions that split across a cacheline boundary. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x42",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired instructions that hit the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_ANY",
+ "PublicDescription": "Number of retired instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0xf",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions that hit the STLB.",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_HIT_LOADS",
- "PublicDescription": "Number of retired load instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired load instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x9",
"Unit": "cpu_core"
@@ -627,18 +871,39 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_HIT_STORES",
- "PublicDescription": "Number of retired store instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired store instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0xa",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired SWPF instructions that hit the STLB.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_SWPF",
+ "PublicDescription": "Number of retired SWPF instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_ANY",
+ "PublicDescription": "Retired instructions that miss the STLB. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x17",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions that miss the STLB.",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
- "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x11",
"Unit": "cpu_core"
@@ -649,18 +914,28 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
- "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x12",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired SWPF instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_SWPF",
+ "PublicDescription": "Number of retired SWPF instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x14",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$)",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD",
- "PublicDescription": "Counts retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$) Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$) Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -671,7 +946,7 @@
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
- "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3, Hit-with-FWD is normally excluded. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3, Hit-with-FWD is normally excluded. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -682,7 +957,7 @@
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
- "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -693,7 +968,7 @@
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD",
- "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -704,7 +979,7 @@
"Data_LA": "1",
"EventCode": "0xd4",
"EventName": "MEM_LOAD_MISC_RETIRED.UC",
- "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock). Available PDIST counters: 0",
+ "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock). Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -715,7 +990,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.FB_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -726,7 +1001,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x101",
"Unit": "cpu_core"
@@ -737,7 +1012,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT_L0",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -747,7 +1022,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT_L1",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 1 of the L1 data cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 1 of the L1 data cache. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"Unit": "cpu_core"
},
@@ -757,7 +1032,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_MISS",
- "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache. Available PDIST counters: 0,1",
"SampleAfterValue": "200003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -768,7 +1043,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L2_HIT",
- "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources. Available PDIST counters: 0,1",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -779,7 +1054,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L2_MISS",
- "PublicDescription": "Counts retired load instructions missed L2 cache as data sources. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions missed L2 cache as data sources. Available PDIST counters: 0,1",
"SampleAfterValue": "100021",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -790,7 +1065,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L3_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache. Available PDIST counters: 0,1",
"SampleAfterValue": "100021",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -801,12 +1076,21 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L3_MISS",
- "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache. Available PDIST counters: 0,1",
"SampleAfterValue": "50021",
"UMask": "0x20",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of load ops retired that miss the L3 cache and hit in DRAM",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd4",
+ "EventName": "MEM_LOAD_UOPS_MISC_RETIRED.LOCAL_DRAM",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_lowpower"
+ },
+ {
"BriefDescription": "Counts the number of load ops retired that hit the L1 data cache",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd1",
@@ -889,6 +1173,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of load ops retired that hit in the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1c",
+ "Unit": "cpu_lowpower"
+ },
+ {
"BriefDescription": "Counts the number of loads that hit in a write combining buffer (WCB), excluding the first load that caused the WCB to allocate.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xd1",
@@ -988,6 +1281,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of memory uops retired. A single uop that performs both a load AND a store will be counted as 1, not 2 (e.g. ADD [mem], CONST)",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.ALL",
+ "SampleAfterValue": "200003",
+ "UMask": "0x83",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of load uops retired.",
"Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
@@ -1041,7 +1344,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
@@ -1065,7 +1368,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
@@ -1101,7 +1404,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
@@ -1125,7 +1428,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
@@ -1149,7 +1452,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
@@ -1173,7 +1476,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
@@ -1197,7 +1500,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
@@ -1221,7 +1524,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
@@ -1264,6 +1567,26 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of memory renamed load uops retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.MRN_LOADS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x9",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of memory renamed store uops retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.MRN_STORES",
+ "SampleAfterValue": "200003",
+ "UMask": "0xa",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of memory uops retired that were splits.",
"Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
@@ -1331,9 +1654,29 @@
"EventName": "MEM_UOPS_RETIRED.STLB_MISS",
"SampleAfterValue": "200003",
"UMask": "0x13",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of memory uops retired that missed in the second level TLB.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x13",
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of load ops retired that filled the STLB - includes those in DTLB_LOAD_MISSES submasks",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x11",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of load uops retired that miss in the second Level TLB.",
"Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
@@ -1344,6 +1687,16 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of store ops retired (store STLB miss)",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES",
+ "SampleAfterValue": "200003",
+ "UMask": "0x12",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of store uops retired that miss in the second level TLB.",
"Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
@@ -1384,8 +1737,32 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts writebacks of modified cachelines that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_M.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E00008",
+ "PublicDescription": "Counts writebacks of modified cachelines that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts writebacks of non-modified cachelines that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_NONM.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E01000",
+ "PublicDescription": "Counts writebacks of non-modified cachelines that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts demand data reads that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -1397,7 +1774,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -1409,7 +1786,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop hit in another cores caches which forwarded the unmodified data to the requesting core.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
@@ -1421,7 +1798,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -1433,7 +1810,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -1444,6 +1821,18 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E04477",
+ "PublicDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Any memory transaction that reached the SQ.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x21",
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/floating-point.json b/tools/perf/pmu-events/arch/x86/arrowlake/floating-point.json
index 23a80c526aa1..3e68c2468f11 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/floating-point.json
@@ -1,5 +1,15 @@
[
{
+ "BriefDescription": "Counts the number of cycles when any of the floating point dividers are active.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xcd",
+ "EventName": "ARITH.FPDIV_ACTIVE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycles when floating-point divide unit is busy executing divide or square root operations.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"CounterMask": "1",
@@ -21,6 +31,24 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of active floating point dividers per cycle in the loop stage.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcd",
+ "EventName": "ARITH.FPDIV_OCCUPANCY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of floating point divider uops executed per cycle.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcd",
+ "EventName": "ARITH.FPDIV_UOPS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts all microcode FP assists.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc1",
@@ -474,6 +502,51 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of uops executed on all floating point ports.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1f",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 0.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.P0",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 1.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.P1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 2.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.P2",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on floating point and vector integer port 3.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb2",
+ "EventName": "FP_VINT_UOPS_EXECUTED.P3",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of uops executed on floating point and vector integer port 0, 1, 2, 3.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json b/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json
index 56cf1ec63200..a15de050a76c 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/frontend.json
@@ -30,6 +30,42 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of BACLEARS due to a conditional jump.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.COND",
+ "SampleAfterValue": "200003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of BACLEARS due to an indirect branch.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.INDIRECT",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of BACLEARS due to a return branch.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.RETURN",
+ "SampleAfterValue": "200003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of BACLEARS due to a direct, unconditional jump.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe6",
+ "EventName": "BACLEARS.UNCOND",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Stalls caused by changing prefix length of the instruction.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x87",
@@ -49,6 +85,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of times a decode restriction reduces the decode throughput due to wrong instruction length prediction.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe9",
+ "EventName": "DECODE_RESTRICTION.PREDECODE_WRONG",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "DSB-to-MITE switch true penalty cycles.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x61",
@@ -81,7 +126,7 @@
"EventName": "FRONTEND_RETIRED.ANY_ANT",
"MSRIndex": "0x3F7",
"MSRValue": "0x9",
- "PublicDescription": "Always Not Taken (ANT) conditional retired branches (no BTB entry and not mispredicted) Available PDIST counters: 0",
+ "PublicDescription": "Always Not Taken (ANT) conditional retired branches (no BTB entry and not mispredicted) Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -93,7 +138,7 @@
"EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x1",
- "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -179,7 +224,7 @@
"EventName": "FRONTEND_RETIRED.DSB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x11",
- "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -218,7 +263,7 @@
"EventName": "FRONTEND_RETIRED.ITLB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x14",
- "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -239,7 +284,7 @@
"EventName": "FRONTEND_RETIRED.L1I_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x12",
- "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -251,7 +296,7 @@
"EventName": "FRONTEND_RETIRED.L2_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x13",
- "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -263,7 +308,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
"MSRIndex": "0x3F7",
"MSRValue": "0x608006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -275,7 +320,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
"MSRIndex": "0x3F7",
"MSRValue": "0x601006",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -287,7 +332,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
"MSRIndex": "0x3F7",
"MSRValue": "0x600206",
- "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -299,7 +344,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
"MSRIndex": "0x3F7",
"MSRValue": "0x610006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -311,7 +356,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
"MSRIndex": "0x3F7",
"MSRValue": "0x100206",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -323,7 +368,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
"MSRIndex": "0x3F7",
"MSRValue": "0x602006",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -335,7 +380,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
"MSRIndex": "0x3F7",
"MSRValue": "0x600406",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -347,7 +392,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
"MSRIndex": "0x3F7",
"MSRValue": "0x620006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -359,7 +404,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
"MSRIndex": "0x3F7",
"MSRValue": "0x604006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -371,7 +416,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
"MSRIndex": "0x3F7",
"MSRValue": "0x600806",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -383,7 +428,7 @@
"EventName": "FRONTEND_RETIRED.MISP_ANT",
"MSRIndex": "0x3F7",
"MSRValue": "0x9",
- "PublicDescription": "ANT retired branches that got just mispredicted Available PDIST counters: 0",
+ "PublicDescription": "ANT retired branches that got just mispredicted Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -395,7 +440,7 @@
"EventName": "FRONTEND_RETIRED.MS_FLOWS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PublicDescription": "Counts flows delivered by the Microcode Sequencer Available PDIST counters: 0",
+ "PublicDescription": "Counts flows delivered by the Microcode Sequencer Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -443,7 +488,7 @@
"EventName": "FRONTEND_RETIRED.STLB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x15",
- "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -455,7 +500,7 @@
"EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
"MSRIndex": "0x3F7",
"MSRValue": "0x17",
- "PublicDescription": "Number retired branch instructions that caused the front-end to be resteered when it finds the instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number retired branch instructions that caused the front-end to be resteered when it finds the instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -738,8 +783,35 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "MS_DECODED.MS_BUSY",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the micro-sequencer is busy.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe7",
+ "EventName": "MS_DECODED.MS_BUSY",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_lowpower"
+ },
+ {
+ "BriefDescription": "Counts the number of times entered into a ucode flow in the FEC. Includes inserted flows due to front-end detected faults or assists.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe7",
+ "EventName": "MS_DECODED.MS_ENTRY",
+ "SampleAfterValue": "200003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of times nanocode flow is executed.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe7",
+ "EventName": "MS_DECODED.NANO_CODE",
+ "SampleAfterValue": "200003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/memory.json b/tools/perf/pmu-events/arch/x86/arrowlake/memory.json
index fb8d4ac69bda..05cc46518232 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/memory.json
@@ -1,5 +1,14 @@
[
{
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.ANY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x7f",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x05",
@@ -63,6 +72,16 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to other block cases.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.OTHER",
+ "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to other block cases such as pipeline conflicts, fences, etc.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x05",
@@ -83,6 +102,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a pagewalk.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.PGWALK",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x05",
@@ -101,6 +129,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a store address match.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.ST_ADDR",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x05",
@@ -119,6 +156,24 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to store data forward block.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.ST_DATA",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to request buffers full or lock in progress.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.WCB_FULL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to request buffers full or lock in progress.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x05",
@@ -156,6 +211,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine without the use of microcode.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MEMORY_ORDERING_FAST",
+ "SampleAfterValue": "20003",
+ "UMask": "0x82",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles.",
"Counter": "2,3,4,5,6,7,8,9",
"Data_LA": "1",
@@ -163,7 +227,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024",
"MSRIndex": "0x3F6",
"MSRValue": "0x400",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "53",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -176,7 +240,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "1009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -189,7 +253,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
"MSRIndex": "0x3F6",
"MSRValue": "0x10",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "20011",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -202,7 +266,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_2048",
"MSRIndex": "0x3F6",
"MSRValue": "0x800",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "23",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -215,7 +279,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "503",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -228,7 +292,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
"MSRIndex": "0x3F6",
"MSRValue": "0x20",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100007",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -241,7 +305,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
"MSRIndex": "0x3F6",
"MSRValue": "0x4",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -254,7 +318,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
"MSRIndex": "0x3F6",
"MSRValue": "0x200",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "101",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -267,7 +331,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
"MSRIndex": "0x3F6",
"MSRValue": "0x40",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "2003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -280,7 +344,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
"MSRIndex": "0x3F6",
"MSRValue": "0x8",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "50021",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -291,7 +355,7 @@
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE",
- "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8 Available PDIST counters: 0",
+ "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8 Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -334,7 +398,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by DRAM.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.DRAM",
"MSRIndex": "0x1a6,0x1a7",
@@ -346,7 +410,7 @@
},
{
"BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -358,7 +422,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/other.json b/tools/perf/pmu-events/arch/x86/arrowlake/other.json
index 51bc763a5887..c8feed3a99a6 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/other.json
@@ -19,6 +19,89 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of unhalted cycles a Core is blocked due to a lock In Progress issued by another core",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x63",
+ "EventName": "BUS_LOCK.BLOCKED_CYCLES",
+ "PublicDescription": "Counts the number of unhalted cycles a Core is blocked due to a lock In Progress issued by another core. Counts on a per core basis.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of unhalted cycles a Core is blocked due to an Accepted lock it issued, includes both split and non-split lock cycles.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x63",
+ "EventName": "BUS_LOCK.LOCK_CYCLES",
+ "PublicDescription": "Counts the number of unhalted cycles a Core is blocked due to an Accepted lock it issued, includes both split and non-split lock cycles. Counts on a per core basis.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of non-split locks such as UC locks issued by a Core (does not include cache locks)",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x63",
+ "EventName": "BUS_LOCK.NON_SPLIT_LOCKS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of split locks issued by a Core",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x63",
+ "EventName": "BUS_LOCK.SPLIT_LOCKS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the L2 Prefetchers are at throttle level 0",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "DYNAMIC_PREFETCH_THROTTLER.LEVEL0_SOC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the L2 Prefetcher throttle level is at 1",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "DYNAMIC_PREFETCH_THROTTLER.LEVEL1_SOC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the L2 Prefetcher throttle level is at 2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "DYNAMIC_PREFETCH_THROTTLER.LEVEL2_SOC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the L2 Prefetcher throttle level is at 3",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "DYNAMIC_PREFETCH_THROTTLER.LEVEL3_SOC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles the L2 Prefetcher throttle level is at 4",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x32",
+ "EventName": "DYNAMIC_PREFETCH_THROTTLER.LEVEL4_SOC",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "This event is deprecated. [This event is alias to MISC_RETIRED.LBR_INSERTS]",
"Counter": "0,1,2,3,4,5,6,7",
"Deprecated": "1",
@@ -66,7 +149,7 @@
},
{
"BriefDescription": "Counts streaming stores that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -86,5 +169,41 @@
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts the number of prefetch requests that were promoted in the XQ to a demand request.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xf4",
+ "EventName": "XQ_PROMOTION.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x7",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of prefetch requests that were promoted in the XQ to a demand code read.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xf4",
+ "EventName": "XQ_PROMOTION.CRDS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of prefetch requests that were promoted in the XQ to a demand read.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xf4",
+ "EventName": "XQ_PROMOTION.DRDS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of prefetch requests that were promoted in the XQ to a demand RFO.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xf4",
+ "EventName": "XQ_PROMOTION.RFOS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/pipeline.json b/tools/perf/pmu-events/arch/x86/arrowlake/pipeline.json
index 18a22368b99b..805616052925 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/pipeline.json
@@ -31,6 +31,16 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of cycles when any of the integer dividers are active.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "CounterMask": "1",
+ "EventCode": "0xcd",
+ "EventName": "ARITH.IDIV_ACTIVE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycles when integer divide unit is busy executing divide or square root operations.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"CounterMask": "1",
@@ -42,6 +52,24 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of active integer dividers per cycle.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcd",
+ "EventName": "ARITH.IDIV_OCCUPANCY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of integer divider uops executed per cycle.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcd",
+ "EventName": "ARITH.IDIV_UOPS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc1",
@@ -74,13 +102,14 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts all branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts all branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"Unit": "cpu_core"
},
{
"BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL010, ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES",
"PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires. All branch type instructions are accounted for.",
@@ -101,7 +130,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND",
- "PublicDescription": "Counts conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x111",
"Unit": "cpu_core"
@@ -109,6 +138,7 @@
{
"BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND",
"SampleAfterValue": "200003",
@@ -116,11 +146,20 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of not taken JCC branch instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.COND_NTAKEN",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7f",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Not taken branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_NTAKEN",
- "PublicDescription": "Counts not taken branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts not taken branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -139,7 +178,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN",
- "PublicDescription": "Counts taken conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x101",
"Unit": "cpu_core"
@@ -158,7 +197,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN_BWD",
- "PublicDescription": "Counts taken backward conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken backward conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -168,7 +207,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN_FWD",
- "PublicDescription": "Counts taken forward conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken forward conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x102",
"Unit": "cpu_core"
@@ -187,7 +226,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.FAR_BRANCH",
- "PublicDescription": "Counts far branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts far branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -195,6 +234,7 @@
{
"BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.FAR_BRANCH",
"SampleAfterValue": "200003",
@@ -215,7 +255,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.INDIRECT",
- "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0",
+ "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -223,6 +263,7 @@
{
"BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.INDIRECT",
"SampleAfterValue": "200003",
@@ -241,6 +282,7 @@
{
"BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.INDIRECT_CALL",
"SampleAfterValue": "200003",
@@ -248,6 +290,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of near indirect JMP branch instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.INDIRECT_JMP",
+ "SampleAfterValue": "200003",
+ "UMask": "0xef",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of near indirect JMP branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc4",
@@ -257,6 +308,17 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Deprecated": "1",
+ "Errata": "ARL011",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.IND_CALL",
+ "SampleAfterValue": "200003",
+ "UMask": "0xfb",
+ "Unit": "cpu_lowpower"
+ },
+ {
"BriefDescription": "Counts the number of near CALL branch instructions retired",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc4",
@@ -270,7 +332,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_CALL",
- "PublicDescription": "Counts both direct and indirect near call instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts both direct and indirect near call instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -278,6 +340,7 @@
{
"BriefDescription": "Counts the number of near CALL branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL010, ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_CALL",
"SampleAfterValue": "200003",
@@ -298,7 +361,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_RETURN",
- "PublicDescription": "Counts return instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts return instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -313,11 +376,20 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of taken branch instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc4",
+ "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "200003",
+ "UMask": "0xc0",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Taken branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_TAKEN",
- "PublicDescription": "Counts taken branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -325,6 +397,7 @@
{
"BriefDescription": "Counts the number of near taken branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Errata": "ARL011",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_TAKEN",
"SampleAfterValue": "200003",
@@ -372,7 +445,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path. Available PDIST counters: 0",
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"Unit": "cpu_core"
},
@@ -390,7 +463,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES_COST",
- "PublicDescription": "All mispredicted branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "All mispredicted branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x44",
"Unit": "cpu_core"
@@ -409,7 +482,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND",
- "PublicDescription": "Counts mispredicted conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts mispredicted conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x111",
"Unit": "cpu_core"
@@ -428,17 +501,26 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_COST",
- "PublicDescription": "Mispredicted conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x151",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of mispredicted not taken JCC branch instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.COND_NTAKEN",
+ "SampleAfterValue": "200003",
+ "UMask": "0x7f",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Mispredicted non-taken conditional branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_NTAKEN",
- "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -448,7 +530,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_NTAKEN_COST",
- "PublicDescription": "Mispredicted non-taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted non-taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x50",
"Unit": "cpu_core"
@@ -467,7 +549,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN",
- "PublicDescription": "Counts taken conditional mispredicted branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x101",
"Unit": "cpu_core"
@@ -486,7 +568,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_BWD",
- "PublicDescription": "Counts taken backward conditional mispredicted branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken backward conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -496,7 +578,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_BWD_COST",
- "PublicDescription": "number of branch instructions retired that were mispredicted and taken backward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "number of branch instructions retired that were mispredicted and taken backward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x8001",
"Unit": "cpu_core"
@@ -506,7 +588,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_COST",
- "PublicDescription": "Mispredicted taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x141",
"Unit": "cpu_core"
@@ -516,7 +598,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_FWD",
- "PublicDescription": "Counts taken forward conditional mispredicted branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken forward conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"Unit": "cpu_core"
},
@@ -525,7 +607,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_FWD_COST",
- "PublicDescription": "number of branch instructions retired that were mispredicted and taken forward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "number of branch instructions retired that were mispredicted and taken forward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x8002",
"Unit": "cpu_core"
@@ -544,7 +626,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT",
- "PublicDescription": "Counts miss-predicted near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0",
+ "PublicDescription": "Counts miss-predicted near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -572,7 +654,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
- "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -591,7 +673,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_CALL_COST",
- "PublicDescription": "Mispredicted indirect CALL retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted indirect CALL retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x42",
"Unit": "cpu_core"
@@ -601,12 +683,21 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_COST",
- "PublicDescription": "Mispredicted near indirect branch instructions retired (excluding returns). This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted near indirect branch instructions retired (excluding returns). This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0xc0",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of mispredicted near indirect JMP branch instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.INDIRECT_JMP",
+ "SampleAfterValue": "200003",
+ "UMask": "0xef",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of mispredicted near indirect JMP branch instructions retired.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc5",
@@ -616,11 +707,20 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of mispredicted near taken branch instructions retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc5",
+ "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
+ "SampleAfterValue": "200003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
- "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -639,7 +739,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.NEAR_TAKEN_COST",
- "PublicDescription": "Mispredicted taken near branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted taken near branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x60",
"Unit": "cpu_core"
@@ -649,7 +749,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.RET",
- "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -677,12 +777,21 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.RET_COST",
- "PublicDescription": "Mispredicted ret instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted ret instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x48",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the total number of BTCLEARS.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe8",
+ "EventName": "BTCLEAR.ANY",
+ "PublicDescription": "Counts the total number of BTCLEARS which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xec",
@@ -1046,7 +1155,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.ANY_P",
- "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"Unit": "cpu_core"
},
@@ -1063,7 +1172,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.BR_FUSED",
- "PublicDescription": "retired macro-fused uops when there is a branch in the macro-fused pair (the two instructions that got macro-fused count once in this pmon) Available PDIST counters: 0",
+ "PublicDescription": "retired macro-fused uops when there is a branch in the macro-fused pair (the two instructions that got macro-fused count once in this pmon) Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1073,7 +1182,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0",
+ "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"UMask": "0x30",
"Unit": "cpu_core"
@@ -1083,7 +1192,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1102,7 +1211,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1181,6 +1290,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of uops executed on all Integer ports.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xff",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of uops executed on a load port.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
@@ -1191,6 +1309,42 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of uops executed on integer port 0.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.P0",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on integer port 1.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.P1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on integer port 2.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.P2",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of uops executed on integer port 3.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.P3",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of uops executed on integer port 0,1, 2, 3.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
@@ -1321,6 +1475,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of occurrences a retired load was blocked for any of the following reasons: utlb_miss, 4k_alias, unknown_sta/bad_fwd, unready_fwd (includes md blocks and esp consuming load blocks)",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.ALL",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1f",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of occurrences a retired load gets blocked because its address exactly matches an older store whose data is not ready (a.k.a. unknown). unready_fwd",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x03",
@@ -1349,6 +1512,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of times a load got early blocked due to preceding store operation with unknown address or unknown data. Excluding in-line (immediate) wakeups",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.STORE_EARLY",
+ "SampleAfterValue": "100003",
+ "UMask": "0xa1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of occurrences a retired load gets blocked because its address partially overlaps with an older store (size mismatch) - unknown_sta/bad_forward",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x03",
@@ -1377,6 +1549,25 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of demand loads that match on a wcb (request buffer) allocated by an L1 hardware prefetch [This event is alias to LOAD_HIT_PREFETCH.HW_PF]",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x4c",
+ "EventName": "LOAD_HIT_PREFETCH.HWPF",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "This event is deprecated. [This event is alias to LOAD_HIT_PREFETCH.HWPF]",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "Deprecated": "1",
+ "EventCode": "0x4c",
+ "EventName": "LOAD_HIT_PREFETCH.HW_PF",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"CounterMask": "1",
@@ -1417,6 +1608,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine without the use of microcode.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.ANY_FAST",
+ "SampleAfterValue": "20003",
+ "UMask": "0xff",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Number of machine clears (nukes) of any type.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"CounterMask": "1",
@@ -1447,6 +1647,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine without the use of microcode.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.DISAMBIGUATION_FAST",
+ "SampleAfterValue": "20003",
+ "UMask": "0x88",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of nukes due to memory renaming",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
@@ -1456,6 +1665,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine without the use of microcode.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc3",
+ "EventName": "MACHINE_CLEARS.MRN_NUKE_FAST",
+ "SampleAfterValue": "20003",
+ "UMask": "0x90",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of times that the machine clears due to a page fault. Covers both I-Side and D-Side (Loads/Stores) page faults. A page fault occurs when either the page is not present, or an access violation.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
@@ -1559,11 +1777,20 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of LBR entries recorded. Requires LBRs to be enabled in IA32_LBR_CTL.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe4",
+ "EventName": "MISC_RETIRED.LBR_INSERTS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "LBR record is inserted",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xe4",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "LBR record is inserted Available PDIST counters: 0",
+ "PublicDescription": "LBR record is inserted Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1578,6 +1805,86 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of CLFLUSH, CLWB, and CLDEMOTE instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe0",
+ "EventName": "MISC_RETIRED1.CL_INST",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xff",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of LFENCE instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe0",
+ "EventName": "MISC_RETIRED1.LFENCE",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of RDPMC, RDTSC, and RDTSCP instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe0",
+ "EventName": "MISC_RETIRED1.RDPMC_RDTSC_P",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Count the number of WRMSR instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe0",
+ "EventName": "MISC_RETIRED1.WRMSR",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of faults and software interrupts with vector < 32.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe1",
+ "EventName": "MISC_RETIRED2.FAULT_ALL",
+ "PublicDescription": "Counts the number of faults and software interrupts with vector < 32, including VOE cases.",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of PSB+ nuke events and ToPA trap events.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe1",
+ "EventName": "MISC_RETIRED2.INTEL_PT_CLEARS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of user interrupts delivered.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe1",
+ "EventName": "MISC_RETIRED2.ULI_DELIVERY",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of SENDUIPI instructions retired.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe1",
+ "EventName": "MISC_RETIRED2.ULI_SENDUIPI",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x9",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of VM exits.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xe1",
+ "EventName": "MISC_RETIRED2.VM_EXIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xa5",
@@ -1628,6 +1935,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number issue slots not consumed due to a color request for an FCW or MXCSR control register when all 4 colors (copies) are already in use",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x75",
+ "EventName": "SERIALIZATION.COLOR_STALLS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of issue slots where no uop could issue due to an IQ scoreboard that stalls allocation until a specified older uop retires or (in the case of jump scoreboard) executes. Commonly executed instructions with IQ scoreboards include LFENCE and MFENCE.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x75",
@@ -1705,6 +2021,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Fixed Counter: Counts the number of issue slots not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
+ "Counter": "Fixed counter 4",
+ "EventName": "TOPDOWN_BAD_SPECULATION.ALL",
+ "PublicDescription": "Fixed Counter: Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the IQ. Also, includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x5",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x73",
@@ -1821,6 +2146,14 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x74",
+ "EventName": "TOPDOWN_BE_BOUND.ALL_NON_ARCH",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of retirement slots not consumed due to backend stalls [This event is alias to TOPDOWN_BE_BOUND.ALL]",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
@@ -1929,7 +2262,7 @@
},
{
"BriefDescription": "Fixed Counter: Counts the number of retirement slots not consumed due to front end stalls.",
- "Counter": "37",
+ "Counter": "Fixed counter 5",
"EventName": "TOPDOWN_FE_BOUND.ALL",
"SampleAfterValue": "1000003",
"UMask": "0x6",
@@ -1938,6 +2271,14 @@
{
"BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls",
"Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x71",
+ "EventName": "TOPDOWN_FE_BOUND.ALL_NON_ARCH",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of retirement slots not consumed due to front end stalls",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "TOPDOWN_FE_BOUND.ALL_P",
"SampleAfterValue": "1000003",
@@ -2126,7 +2467,7 @@
},
{
"BriefDescription": "Fixed Counter: Counts the number of consumed retirement slots.",
- "Counter": "38",
+ "Counter": "Fixed counter 6",
"EventName": "TOPDOWN_RETIRING.ALL",
"SampleAfterValue": "1000003",
"UMask": "0x7",
@@ -2135,6 +2476,14 @@
{
"BriefDescription": "Counts the number of consumed retirement slots.",
"Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x72",
+ "EventName": "TOPDOWN_RETIRING.ALL_NON_ARCH",
+ "SampleAfterValue": "1000003",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of consumed retirement slots.",
+ "Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "TOPDOWN_RETIRING.ALL_P",
"SampleAfterValue": "1000003",
@@ -2352,6 +2701,14 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of uops retired",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.ALL",
+ "SampleAfterValue": "2000003",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the total number of uops retired.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
@@ -2399,6 +2756,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of uops retired that were delivered by the loop stream detector (LSD).",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xc2",
+ "EventName": "UOPS_RETIRED.LSD",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
diff --git a/tools/perf/pmu-events/arch/x86/arrowlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/arrowlake/virtual-memory.json
index a3e4a4f3ab45..602e2ad5de6e 100644
--- a/tools/perf/pmu-events/arch/x86/arrowlake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/arrowlake/virtual-memory.json
@@ -9,6 +9,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts walks that miss the PDE_CACHE",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.PDE_CACHE_MISS",
+ "SampleAfterValue": "200003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of first level TLB misses but second level hits due to a demand load that did not start a page walk. Accounts for all page sizes. Will result in a DTLB write from STLB.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x08",
@@ -48,6 +57,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x08",
+ "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+ "SampleAfterValue": "200003",
+ "UMask": "0xe",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x12",
@@ -176,6 +195,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts walks that miss the PDE_CACHE",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.PDE_CACHE_MISS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of first level TLB misses but second level hits due to stores that did not start a page walk. Accounts for all page sizes. Will result in a DTLB write from STLB.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x49",
@@ -216,6 +244,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+ "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0xe",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x13",
@@ -245,6 +283,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to a 2M or 4M page.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x49",
+ "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
+ "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to 2M or 4M pages. Includes page walks that page fault.",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x13",
@@ -325,6 +373,16 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of times there was an ITLB miss and a new translation was filled into the ITLB.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x81",
+ "EventName": "ITLB.FILLS",
+ "PublicDescription": "Counts the number of times the machine was unable to find a translation in the Instruction Translation Lookaside Buffer (ITLB) and a new translation was filled into the ITLB. The event is speculative in nature, but will not count translations (page walks) that are begun and not finished, or translations that are finished but not filled into the ITLB.",
+ "SampleAfterValue": "200003",
+ "UMask": "0x4",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x85",
@@ -343,6 +401,15 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts walks that miss the PDE_CACHE",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x85",
+ "EventName": "ITLB_MISSES.PDE_CACHE_MISS",
+ "SampleAfterValue": "2000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of first level TLB misses but second level hits due to an instruction fetch that did not start a page walk. Account for all pages sizes. Will result in an ITLB write from STLB.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x85",
@@ -502,6 +569,24 @@
"Unit": "cpu_lowpower"
},
{
+ "BriefDescription": "Counts the number of occurrences a load gets blocked because of a micro TLB miss",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.DTLB_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a DTLB miss",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x05",
+ "EventName": "LD_HEAD.DTLB_MISS",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x05",
@@ -518,5 +603,33 @@
"SampleAfterValue": "1000003",
"UMask": "0x90",
"Unit": "cpu_lowpower"
+ },
+ {
+ "BriefDescription": "Counts the number of PMH walks that hit in the L1 or WCBs",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xbc",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L1_HIT",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Counts the number of PMH walks that hit in the L2",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xbc",
+ "EventName": "PAGE_WALKER_LOADS.DTLB_L2_HIT",
+ "PublicDescription": "Counts the number of PMH walks that hit in the L2. Includes L2 Hit resulting from and L1D eviction of another core in the same module which is longer latency than a typical L2 hit.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2",
+ "Unit": "cpu_atom"
+ },
+ {
+ "BriefDescription": "Count number of any STLB flush attempts (Entire, PCID, InvPage, CR3 write, etc)",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xbd",
+ "EventName": "TLB_FLUSHES.STLB_ANY",
+ "SampleAfterValue": "20003",
+ "UMask": "0x20",
+ "Unit": "cpu_atom"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
index 89750117a7f6..1d8e910f5961 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -80,7 +80,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -98,7 +97,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -139,7 +137,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -640,7 +637,7 @@
"MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -653,7 +650,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -665,7 +662,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -854,7 +851,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -1032,7 +1028,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1041,7 +1037,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1050,7 +1046,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
index 81175f0f2603..a5e408ca46a7 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -80,7 +80,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -98,7 +97,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -139,7 +137,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -632,7 +629,7 @@
"MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -645,7 +642,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -657,7 +654,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -846,7 +843,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -1021,7 +1017,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1030,7 +1026,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1039,7 +1035,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
index 5d06a3f72be2..5b83b040060c 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -282,7 +282,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -300,7 +299,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -341,7 +339,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -842,7 +839,7 @@
"MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -855,7 +852,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -867,7 +864,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -907,6 +904,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
@@ -1076,7 +1074,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -1086,6 +1083,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_mem",
@@ -1263,7 +1261,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else CYCLE_ACTIVITY.STALLS_TOTAL - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1272,7 +1270,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1281,7 +1279,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1308,6 +1306,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "310 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_mem",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index 6485b565acbc..2e50a91b6728 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -319,6 +319,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -356,6 +357,7 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFootprint;BvBC;Fed;Frontend;IcMiss;MemoryTLB",
"MetricName": "tma_bottleneck_big_code",
@@ -370,31 +372,35 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
@@ -402,6 +408,7 @@
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * (10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts)) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
@@ -410,7 +417,8 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
"MetricThreshold": "tma_bottleneck_memory_data_tlbs > 20",
@@ -418,7 +426,8 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
"MetricThreshold": "tma_bottleneck_memory_synchronization > 10",
@@ -426,6 +435,7 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;BvMP;tma_issueBM",
"MetricName": "tma_bottleneck_mispredictions",
@@ -434,7 +444,8 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -442,6 +453,7 @@
},
{
"BriefDescription": "Total pipeline cost of \"useful operations\" - the portion of Retiring category not covered by Branching_Overhead nor Irregular_Overhead.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + 2 * BR_INST_RETIRED.NEAR_CALL + INST_RETIRED.NOP) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "BvUW;Ret",
"MetricName": "tma_bottleneck_useful_work",
@@ -469,6 +481,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -539,6 +552,15 @@
"ScaleUnit": "100%"
},
{
+ "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g",
+ "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 1)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_cxl_mem_bound",
+ "MetricThreshold": "tma_cxl_mem_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g. 3D-Xpoint (Crystal Ridge, a.k.a. IXP) memory, PMM - Persistent Memory Module [from CLX to SPR] or any other CXL Type3 Memory [EMR onwards]).",
+ "ScaleUnit": "100%"
+ },
+ {
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "44 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE / (OCR.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE + OCR.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
@@ -569,7 +591,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound - tma_cxl_mem_bound if #has_pmem > 0 else CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -630,7 +652,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -693,7 +715,6 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@ / UOPS_RETIRED.RETIRE_SLOTS",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
@@ -768,6 +789,7 @@
},
{
"BriefDescription": "Branch Misprediction Cost: Cycles representing fraction of TMA slots wasted per non-speculative branch misprediction (retired JEClear)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "tma_bottleneck_mispredictions * tma_info_thread_slots / 4 / BR_MISP_RETIRED.ALL_BRANCHES / 100",
"MetricGroup": "Bad;BrMispredicts;tma_issueBM",
"MetricName": "tma_info_bad_spec_branch_misprediction_cost",
@@ -803,6 +825,7 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound * (tma_fetch_bandwidth / (tma_fetch_bandwidth + tma_fetch_latency)) * (tma_dsb / (tma_dsb + tma_mite)))",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_bandwidth",
@@ -820,6 +843,7 @@
},
{
"BriefDescription": "Total pipeline cost of Instruction Cache misses - subset of the Big_Code Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_fetch_latency * tma_icache_misses / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Fed;FetchLat;IcMiss;tma_issueFL",
"MetricName": "tma_info_botlnk_l2_ic_misses",
@@ -961,7 +985,6 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "INST_RETIRED.ANY / (FP_ARITH_INST_RETIRED.SCALAR + cpu@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE\\,umask\\=0xfc@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_iparith",
@@ -1249,7 +1272,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1267,6 +1290,12 @@
"MetricName": "tma_info_pipeline_fetch_mite"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "IDQ.MS_UOPS / cpu@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "INST_RETIRED.ANY / (FP_ASSIST.ANY + OTHER_ASSISTS.ANY)",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1282,7 +1311,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1294,16 +1323,28 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
{
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_read_bw"
+ },
+ {
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_write_bw"
+ },
+ {
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / tma_info_system_time",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1362,6 +1403,13 @@
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
},
{
+ "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
+ "MetricExpr": "(1e9 * (UNC_M_PMM_RPQ_OCCUPANCY.ALL / UNC_M_PMM_RPQ_INSERTS) / imc_0@event\\=0x0@ if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryLat;Server;SoC",
+ "MetricName": "tma_info_system_mem_pmm_read_latency",
+ "PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
+ },
+ {
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / tma_info_system_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
@@ -1499,12 +1547,13 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1541,7 +1590,7 @@
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1565,6 +1614,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -1591,6 +1641,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 1 GB pages for data load accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_load_stlb_miss * DTLB_LOAD_MISSES.WALK_COMPLETED_1G / (DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_load_stlb_miss_group",
"MetricName": "tma_load_stlb_miss_1g",
@@ -1599,6 +1650,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 2 or 4 MB pages for data load accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_load_stlb_miss * DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M / (DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_load_stlb_miss_group",
"MetricName": "tma_load_stlb_miss_2m",
@@ -1607,6 +1659,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 4 KB pages for data load accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_load_stlb_miss * DTLB_LOAD_MISSES.WALK_COMPLETED_4K / (DTLB_LOAD_MISSES.WALK_COMPLETED_4K + DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M + DTLB_LOAD_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_load_stlb_miss_group",
"MetricName": "tma_load_stlb_miss_4k",
@@ -1624,6 +1677,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(12 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (11 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -1648,7 +1702,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1657,7 +1711,7 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1681,7 +1735,6 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was retiring uops fetched by the Microcode Sequencer (MS) unit",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "UOPS_RETIRED.RETIRE_SLOTS / UOPS_ISSUED.ANY * IDQ.MS_UOPS / tma_info_thread_slots",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_heavy_operations_group;tma_issueMC;tma_issueMS",
"MetricName": "tma_microcode_sequencer",
@@ -1691,6 +1744,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to Branch Resteers as a result of Branch Misprediction at execution stage",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + MACHINE_CLEARS.COUNT) * INT_MISC.CLEAR_RESTEER_CYCLES / tma_info_thread_clks",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;TopdownL4;tma_L4_group;tma_branch_resteers_group;tma_issueBM",
"MetricName": "tma_mispredicts_resteers",
@@ -1745,6 +1799,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -1754,6 +1809,7 @@
},
{
"BriefDescription": "This metric estimates fraction of slots the CPU was stalled due to other cases of misprediction (non-retired x86 branches or other types).",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(tma_branch_mispredicts * (1 - BR_MISP_RETIRED.ALL_BRANCHES / (INT_MISC.CLEARS_COUNT - MACHINE_CLEARS.COUNT)), 0.0001)",
"MetricGroup": "BrMispredicts;BvIO;TopdownL3;tma_L3_group;tma_branch_mispredicts_group",
"MetricName": "tma_other_mispredicts",
@@ -1762,6 +1818,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Nukes (Machine Clears) not related to memory ordering.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(tma_machine_clears * (1 - MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.COUNT), 0.0001)",
"MetricGroup": "BvIO;Machine_Clears;TopdownL3;tma_L3_group;tma_machine_clears_group",
"MetricName": "tma_other_nukes",
@@ -1842,6 +1899,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIVIDER_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - CYCLE_ACTIVITY.STALLS_MEM_ANY else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -1956,7 +2014,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
@@ -2013,6 +2071,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 1 GB pages for data store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_store_stlb_miss * DTLB_STORE_MISSES.WALK_COMPLETED_1G / (DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_store_stlb_miss_group",
"MetricName": "tma_store_stlb_miss_1g",
@@ -2021,6 +2080,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 2 or 4 MB pages for data store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_store_stlb_miss * DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M / (DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_store_stlb_miss_group",
"MetricName": "tma_store_stlb_miss_2m",
@@ -2029,6 +2089,7 @@
},
{
"BriefDescription": "This metric estimates the fraction of cycles to walk the memory paging structures to cache translation of 4 KB pages for data store accesses.",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "tma_store_stlb_miss * DTLB_STORE_MISSES.WALK_COMPLETED_4K / (DTLB_STORE_MISSES.WALK_COMPLETED_4K + DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M + DTLB_STORE_MISSES.WALK_COMPLETED_1G)",
"MetricGroup": "MemoryTLB;TopdownL6;tma_L6_group;tma_store_stlb_miss_group",
"MetricName": "tma_store_stlb_miss_4k",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json
index c9596e18ec09..30390d734051 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-cache.json
@@ -22,7 +22,7 @@
"Unit": "CHA"
},
{
- "BriefDescription": "LLC misses - Uncacheable reads (from cpu) . Derived from unc_cha_tor_inserts.ia_miss",
+ "BriefDescription": "LLC misses - Uncacheable reads (from cpu). Derived from unc_cha_tor_inserts.ia_miss",
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "LLC_MISSES.UNCACHEABLE",
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
index 265cdf334f6a..aafa7af46e69 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/uncore-memory.json
@@ -316,32 +316,32 @@
"Unit": "iMC"
},
{
- "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MB/sec). Derived from unc_m_pmm_rpq_inserts",
+ "BriefDescription": "Intel Optane DC persistent memory bandwidth read (MiB/sec). Derived from unc_m_pmm_rpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE3",
"EventName": "UNC_M_PMM_BANDWIDTH.READ",
"PerPkg": "1",
- "ScaleUnit": "6.103515625E-5MB/sec",
+ "ScaleUnit": "6.103515625E-5MiB/sec",
"Unit": "iMC"
},
{
- "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MB/sec). Derived from unc_m_pmm_rpq_inserts",
+ "BriefDescription": "Intel Optane DC persistent memory bandwidth total (MiB/sec). Derived from unc_m_pmm_rpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE3",
"EventName": "UNC_M_PMM_BANDWIDTH.TOTAL",
"MetricExpr": "UNC_M_PMM_RPQ_INSERTS + UNC_M_PMM_WPQ_INSERTS",
"MetricName": "UNC_M_PMM_BANDWIDTH.TOTAL",
"PerPkg": "1",
- "ScaleUnit": "6.103515625E-5MB/sec",
+ "ScaleUnit": "6.103515625E-5MiB/sec",
"Unit": "iMC"
},
{
- "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MB/sec). Derived from unc_m_pmm_wpq_inserts",
+ "BriefDescription": "Intel Optane DC persistent memory bandwidth write (MiB/sec). Derived from unc_m_pmm_wpq_inserts",
"Counter": "0,1,2,3",
"EventCode": "0xE7",
"EventName": "UNC_M_PMM_BANDWIDTH.WRITE",
"PerPkg": "1",
- "ScaleUnit": "6.103515625E-5MB/sec",
+ "ScaleUnit": "6.103515625E-5MiB/sec",
"Unit": "iMC"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/cache.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/cache.json
index 10bdb193c16f..26568e4b77f7 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/cache.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/cache.json
@@ -1,10 +1,72 @@
[
{
+ "BriefDescription": "Hit snoop reply with data, line invalidated.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.I_FWD_FE",
+ "PublicDescription": "Counts responses to snoops indicating the line will now be (I)nvalidated: removed from this core's cache, after the data is forwarded back to the requestor and indicating the data was found unmodified in the (FE) Forward or Exclusive State in this cores caches cache. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x20"
+ },
+ {
+ "BriefDescription": "HitM snoop reply with data, line invalidated.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.I_FWD_M",
+ "PublicDescription": "Counts responses to snoops indicating the line will now be (I)nvalidated: removed from this core's caches, after the data is forwarded back to the requestor, and indicating the data was found modified(M) in this cores caches cache (aka HitM response). A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x10"
+ },
+ {
+ "BriefDescription": "Hit snoop reply without sending the data, line invalidated.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.I_HIT_FSE",
+ "PublicDescription": "Counts responses to snoops indicating the line will now be (I)nvalidated in this core's caches without forwarded back to the requestor. The line was in Forward, Shared or Exclusive (FSE) state in this cores caches. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "Line not found snoop reply",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.MISS",
+ "PublicDescription": "Counts responses to snoops indicating that the data was not found (IHitI) in this core's caches. A single snoop response from the core counts on all hyperthreads of the Core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Hit snoop reply with data, line kept in Shared state.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.S_FWD_FE",
+ "PublicDescription": "Counts responses to snoops indicating the line may be kept on this core in the (S)hared state, after the data is forwarded back to the requestor, initially the data was found in the cache in the (FS) Forward or Shared state. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x40"
+ },
+ {
+ "BriefDescription": "HitM snoop reply with data, line kept in Shared state",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.S_FWD_M",
+ "PublicDescription": "Counts responses to snoops indicating the line may be kept on this core in the (S)hared state, after the data is forwarded back to the requestor, initially the data was found in the cache in the (M)odified state. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x8"
+ },
+ {
+ "BriefDescription": "Hit snoop reply without sending the data, line kept in Shared state.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x27",
+ "EventName": "CORE_SNOOP_RESPONSE.S_HIT_FSE",
+ "PublicDescription": "Counts responses to snoops indicating the line was kept on this core in the (S)hared state, and that the data was found unmodified but not forwarded back to the requestor, initially the data was found in the cache in the (FSE) Forward, Shared state or Exclusive state. A single snoop response from the core counts on all hyperthreads of the core.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4"
+ },
+ {
"BriefDescription": "L1D.HWPF_MISS",
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.HWPF_MISS",
- "PublicDescription": "L1D.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -13,7 +75,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.REPLACEMENT",
- "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace. Available PDIST counters: 0",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -22,7 +84,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -33,7 +95,7 @@
"EdgeDetect": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
- "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -43,7 +105,6 @@
"Deprecated": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALL",
- "PublicDescription": "This event is deprecated. Refer to new event L1D_PEND_MISS.L2_STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -52,7 +113,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALLS",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -61,7 +122,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING",
- "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -71,7 +132,7 @@
"CounterMask": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "PublicDescription": "Counts duration of L1D miss outstanding in cycles. Available PDIST counters: 0",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -80,7 +141,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.ALL",
- "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
"SampleAfterValue": "100003",
"UMask": "0x1f"
},
@@ -89,7 +150,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.NON_SILENT",
- "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3 Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
"SampleAfterValue": "200003",
"UMask": "0x2"
},
@@ -98,7 +159,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.SILENT",
- "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event.",
"SampleAfterValue": "200003",
"UMask": "0x1"
},
@@ -107,7 +168,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.USELESS_HWPF",
- "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
@@ -116,7 +177,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.ALL",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES]",
"SampleAfterValue": "200003",
"UMask": "0xff"
},
@@ -125,7 +186,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f"
},
@@ -134,7 +195,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_CODE_RD",
- "PublicDescription": "Counts the total number of L2 code requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of L2 code requests.",
"SampleAfterValue": "200003",
"UMask": "0xe4"
},
@@ -143,7 +204,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0xe1"
},
@@ -152,7 +213,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
- "PublicDescription": "Counts demand requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x27"
},
@@ -161,7 +222,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
- "PublicDescription": "Counts demand requests to L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests to L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xe7"
},
@@ -170,7 +231,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_HWPF",
- "PublicDescription": "L2_RQSTS.ALL_HWPF Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0xf0"
},
@@ -179,7 +239,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_RFO",
- "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
"SampleAfterValue": "200003",
"UMask": "0xe2"
},
@@ -188,7 +248,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_HIT",
- "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
"SampleAfterValue": "200003",
"UMask": "0xc4"
},
@@ -197,7 +257,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_MISS",
- "PublicDescription": "Counts L2 cache misses when fetching instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
"SampleAfterValue": "200003",
"UMask": "0x24"
},
@@ -206,7 +266,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc1"
},
@@ -215,7 +275,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
- "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0x21"
},
@@ -224,7 +284,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HWPF_MISS",
- "PublicDescription": "L2_RQSTS.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x30"
},
@@ -233,7 +292,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f"
},
@@ -242,7 +301,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.REFERENCES",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL]",
"SampleAfterValue": "200003",
"UMask": "0xff"
},
@@ -251,7 +310,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_HIT",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc2"
},
@@ -260,7 +319,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_MISS",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x22"
},
@@ -269,7 +328,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_HIT",
- "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0xc8"
},
@@ -278,7 +337,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_MISS",
- "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0x28"
},
@@ -287,7 +346,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x23",
"EventName": "L2_TRANS.L2_WB",
- "PublicDescription": "Counts L2 writebacks that access L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x40"
},
@@ -296,7 +355,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
- "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x41"
},
@@ -305,7 +364,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.REFERENCE",
- "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x4f"
},
@@ -394,7 +453,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
- "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss) Available PDIST counters: 0",
+ "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)",
"SampleAfterValue": "1000003",
"UMask": "0xfd"
},
@@ -563,7 +622,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x44",
"EventName": "MEM_STORE_RETIRED.L2_HIT",
- "PublicDescription": "MEM_STORE_RETIRED.L2_HIT Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x1"
},
@@ -572,7 +630,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe5",
"EventName": "MEM_UOP_RETIRED.ANY",
- "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses Available PDIST counters: 0",
+ "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -999,7 +1057,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "PublicDescription": "OFFCORE_REQUESTS.ALL_REQUESTS Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -1008,7 +1065,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DATA_RD",
- "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -1017,7 +1074,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
- "PublicDescription": "Counts both cacheable and non-cacheable code read requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts both cacheable and non-cacheable code read requests.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -1026,7 +1083,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore. Available PDIST counters: 0",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -1035,7 +1092,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
- "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -1045,7 +1102,6 @@
"Deprecated": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
- "PublicDescription": "This event is deprecated. Refer to new event OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1055,7 +1111,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1065,7 +1120,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1075,7 +1130,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "PublicDescription": "Cycles where at least 1 outstanding demand data read request is pending. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1085,7 +1139,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -1094,7 +1147,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1103,7 +1155,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1112,7 +1164,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -1121,7 +1173,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x2c",
"EventName": "SQ_MISC.BUS_LOCK",
- "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory. Available PDIST counters: 0",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -1130,7 +1182,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.ANY",
- "PublicDescription": "Counts the number of PREFETCHNTA, PREFETCHW, PREFETCHT0, PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0xf"
},
@@ -1139,7 +1190,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.NTA",
- "PublicDescription": "Counts the number of PREFETCHNTA instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -1148,7 +1199,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
- "PublicDescription": "Counts the number of PREFETCHW instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -1157,7 +1208,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T0",
- "PublicDescription": "Counts the number of PREFETCHT0 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -1166,7 +1217,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x4"
}
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/emr-metrics.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/emr-metrics.json
index 34e1cbcd722c..433ae5f50704 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/emr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/emr-metrics.json
@@ -1,28 +1,28 @@
[
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
@@ -40,6 +40,18 @@
"ScaleUnit": "1per_instr"
},
{
+ "BriefDescription": "The average number of cores that are in cstate C0 as observed by the power control unit (PCU)",
+ "MetricExpr": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C0 / UNC_P_CLOCKTICKS * #num_packages",
+ "MetricGroup": "cpu_cstate",
+ "MetricName": "cpu_cstate_c0"
+ },
+ {
+ "BriefDescription": "The average number of cores are in cstate C6 as observed by the power control unit (PCU)",
+ "MetricExpr": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C6 / UNC_P_CLOCKTICKS * #num_packages",
+ "MetricGroup": "cpu_cstate",
+ "MetricName": "cpu_cstate_c6"
+ },
+ {
"BriefDescription": "CPU operating frequency (in GHz)",
"MetricExpr": "CPU_CLK_UNHALTED.THREAD / CPU_CLK_UNHALTED.REF_TSC * #SYSTEM_TSC_FREQ / 1e9",
"MetricName": "cpu_operating_frequency",
@@ -79,6 +91,12 @@
"ScaleUnit": "1MB/s"
},
{
+ "BriefDescription": "Bandwidth of inbound IO reads that are initiated by end device controllers that are requesting memory from the CPU and miss the L3 cache",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_read_l3_miss",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1e6 / duration_time",
"MetricName": "io_bandwidth_read_local",
@@ -97,6 +115,12 @@
"ScaleUnit": "1MB/s"
},
{
+ "BriefDescription": "Bandwidth of inbound IO writes that are initiated by end device controllers that are writing memory to the CPU",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_write_l3_miss",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket",
"MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL) * 64 / 1e6 / duration_time",
"MetricName": "io_bandwidth_write_local",
@@ -111,19 +135,19 @@
{
"BriefDescription": "Percentage of inbound full cacheline writes initiated by end device controllers that miss the L3 cache",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM",
- "MetricName": "io_percent_of_inbound_full_writes_that_miss_l3",
+ "MetricName": "io_full_write_l3_miss",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Percentage of inbound partial cacheline writes initiated by end device controllers that miss the L3 cache",
"MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)",
- "MetricName": "io_percent_of_inbound_partial_writes_that_miss_l3",
+ "MetricName": "io_partial_write_l3_miss",
"ScaleUnit": "100%"
},
{
"BriefDescription": "Percentage of inbound reads initiated by end device controllers that miss the L3 cache",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR",
- "MetricName": "io_percent_of_inbound_reads_that_miss_l3",
+ "MetricName": "io_read_l3_miss",
"ScaleUnit": "100%"
},
{
@@ -335,7 +359,7 @@
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvOB;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -370,39 +394,39 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * tma_amx_busy / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + 0 / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + 0 / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * tma_amx_busy / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20"
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * ((1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0) / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricExpr": "100 * ((1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0) / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -434,7 +458,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -450,7 +474,7 @@
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;Default;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
@@ -551,7 +575,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(76.6 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) + 74.6 * tma_info_system_core_frequency * MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "BvMS;DataSharing;LockCont;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -658,7 +681,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -786,7 +809,7 @@
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "Default;Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1297,19 +1320,19 @@
{
"BriefDescription": "Off-core accesses per kilo instruction for modified write requests",
"MetricExpr": "1e3 * OCR.MODIFIED_WRITE.ANY_RESPONSE / tma_info_inst_mix_instructions",
- "MetricGroup": "Offcore",
+ "MetricGroup": "Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_mwrite_any_pki"
},
{
"BriefDescription": "Off-core accesses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
"MetricExpr": "1e3 * OCR.READS_TO_CORE.ANY_RESPONSE / tma_info_inst_mix_instructions",
- "MetricGroup": "CacheHits;Offcore",
+ "MetricGroup": "CacheHits;Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_read_any_pki"
},
{
"BriefDescription": "L3 cache misses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
"MetricExpr": "1e3 * OCR.READS_TO_CORE.L3_MISS / tma_info_inst_mix_instructions",
- "MetricGroup": "Offcore",
+ "MetricGroup": "Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_read_l3m_pki"
},
{
@@ -1335,21 +1358,21 @@
{
"BriefDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket",
"MetricExpr": "64 * OCR.READS_TO_CORE.DRAM / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_dram_bw",
"PublicDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket. See R2C_Offcore_BW."
},
{
"BriefDescription": "Average L3-cache miss BW for Reads-to-Core (R2C)",
"MetricExpr": "64 * OCR.READS_TO_CORE.L3_MISS / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_l3m_bw",
"PublicDescription": "Average L3-cache miss BW for Reads-to-Core (R2C). This covering going to DRAM or other memory off-chip memory tears. See R2C_Offcore_BW."
},
{
"BriefDescription": "Average Off-core access BW for Reads-to-Core (R2C)",
"MetricExpr": "64 * OCR.READS_TO_CORE.ANY_RESPONSE / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_offcore_bw",
"PublicDescription": "Average Off-core access BW for Reads-to-Core (R2C). R2C account for demand or prefetch load/RFO/code access that fill data into the Core caches."
},
@@ -1379,7 +1402,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1426,7 +1449,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1438,16 +1461,28 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
{
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_read_bw"
+ },
+ {
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_write_bw"
+ },
+ {
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / tma_info_system_time",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1513,7 +1548,6 @@
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / tma_info_system_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
"MetricName": "tma_info_system_mem_read_latency",
@@ -1674,12 +1708,12 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1693,7 +1727,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L2 cache under unloaded scenarios (possibly L2 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "4.4 * tma_info_system_core_frequency * MEM_LOAD_RETIRED.L2_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_l2_bound_group",
"MetricName": "tma_l2_hit_latency",
@@ -1712,12 +1745,11 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "32.6 * tma_info_system_core_frequency * (MEM_LOAD_RETIRED.L3_HIT * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1800,6 +1832,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(16 * max(0, MEM_INST_RETIRED.LOCK_LOADS - L2_RQSTS.ALL_RFO) + MEM_INST_RETIRED.LOCK_LOADS / MEM_INST_RETIRED.ALL_STORES * (10 * L2_RQSTS.RFO_HIT + min(CPU_CLK_UNHALTED.THREAD, OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO))) / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -1832,7 +1865,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1841,13 +1874,13 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "Backend;Default;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -1857,7 +1890,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -1910,7 +1942,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "max(IDQ.MS_CYCLES_ANY, cpu@UOPS_RETIRED.MS\\,cmask\\=1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY)) / tma_info_core_core_clks / 2",
+ "MetricExpr": "max(IDQ.MS_CYCLES_ANY, cpu@UOPS_RETIRED.MS\\,cmask\\=1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY)) / tma_info_core_core_clks / 2.4",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -1945,6 +1977,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -2006,6 +2039,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_3_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_3_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -2015,6 +2049,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "(EXE_ACTIVITY.EXE_BOUND_0_PORTS + max(RS.EMPTY_RESOURCE - RESOURCE_STALLS.SCOREBOARD, 0)) / tma_info_thread_clks * (CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
@@ -2024,6 +2059,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
@@ -2033,7 +2069,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -2043,7 +2078,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
"MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -2072,7 +2106,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2100,7 +2134,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -2132,7 +2165,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
index 8c9207750c82..bc475e163227 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/floating-point.json
@@ -5,7 +5,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FPDIV_ACTIVE",
- "PublicDescription": "ARITH.FPDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -14,7 +13,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.FP",
- "PublicDescription": "Counts all microcode Floating Point assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -23,7 +22,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.SSE_AVX_MIX",
- "PublicDescription": "ASSISTS.SSE_AVX_MIX Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -32,7 +30,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -41,7 +38,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -50,7 +46,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -59,7 +54,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V0",
- "PublicDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -68,7 +62,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V1",
- "PublicDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -77,7 +70,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V2",
- "PublicDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -86,7 +78,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -95,7 +87,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -104,7 +96,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -113,7 +105,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -122,7 +114,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x18"
},
@@ -131,7 +123,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x40"
},
@@ -140,7 +132,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -149,7 +141,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.8_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x60"
},
@@ -158,7 +150,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -167,7 +159,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -176,7 +168,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -185,7 +177,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.VECTOR",
- "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0xfc"
},
@@ -194,7 +186,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.128B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.128B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -203,7 +194,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.256B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.256B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -212,7 +202,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.512B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.512B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -221,7 +210,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -230,7 +218,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.SCALAR",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR Available PDIST counters: 0",
+ "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR",
"SampleAfterValue": "100003",
"UMask": "0x3"
},
@@ -239,7 +227,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.SCALAR_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -248,7 +235,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.VECTOR",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.VECTOR Available PDIST counters: 0",
+ "PublicDescription": "FP_ARITH_INST_RETIRED2.VECTOR",
"SampleAfterValue": "100003",
"UMask": "0x1c"
}
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json
index 9fe9d62b867a..793c486ffabe 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/frontend.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x60",
"EventName": "BACLEARS.ANY",
- "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -13,7 +13,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -22,7 +22,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.MS_BUSY",
- "PublicDescription": "Cycles the Microcode Sequencer is busy. Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x2"
},
@@ -31,7 +30,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x61",
"EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
- "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE. Available PDIST counters: 0",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -249,7 +248,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALLS",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -260,7 +259,6 @@
"EdgeDetect": "1",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALL_PERIODS",
- "PublicDescription": "ICACHE_DATA.STALL_PERIODS Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -269,7 +267,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x83",
"EventName": "ICACHE_TAG.STALLS",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
@@ -279,7 +277,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -289,7 +287,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -298,7 +296,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -308,7 +306,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -318,7 +316,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -327,7 +325,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MITE_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -337,7 +335,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_CYCLES_ANY",
- "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -348,7 +346,7 @@
"EdgeDetect": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_SWITCHES",
- "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer. Available PDIST counters: 0",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -357,7 +355,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MS_UOPS",
- "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS).",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -366,7 +364,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -376,7 +374,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -387,7 +385,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -396,7 +394,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -406,7 +404,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -417,7 +415,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json
index 7c3f9b76d367..5e6c1f05c981 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/memory.json
@@ -5,7 +5,6 @@
"CounterMask": "6",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x6"
},
@@ -14,7 +13,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -24,7 +23,6 @@
"CounterMask": "2",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -34,7 +32,6 @@
"CounterMask": "3",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -44,7 +41,7 @@
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x5"
},
@@ -54,7 +51,7 @@
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -478,7 +475,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Counts demand data read requests that miss the L3 cache. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -487,7 +483,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -505,7 +501,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_EVENTS",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 3 categories (e.g. interrupt). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 3 categories (e.g. interrupt).",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -514,7 +510,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_MEM",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -523,7 +519,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type.",
"SampleAfterValue": "100003",
"UMask": "0x40"
},
@@ -532,7 +528,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -541,7 +537,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.COMMIT",
- "PublicDescription": "Counts the number of times RTM commit succeeded. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times RTM commit succeeded.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -550,7 +546,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.START",
- "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -559,7 +555,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CAPACITY_READ",
- "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads Available PDIST counters: 0",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -568,7 +564,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
- "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes. Available PDIST counters: 0",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -577,7 +573,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CONFLICT",
- "PublicDescription": "Counts the number of times a TSX line had a cache conflict. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a TSX line had a cache conflict.",
"SampleAfterValue": "100003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/other.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/other.json
index a58d65556609..21f49f609ed4 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/other.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/other.json
@@ -4,11 +4,35 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.PAGE_FAULT",
- "PublicDescription": "ASSISTS.PAGE_FAULT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
{
+ "BriefDescription": "HW_INTERRUPTS.MASKED",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.MASKED",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "HW_INTERRUPTS.PENDING_AND_MASKED",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of hardware interrupts received by the processor.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.RECEIVED",
+ "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
+ "SampleAfterValue": "203",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts streaming stores that have any type of response.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -25,7 +49,7 @@
"CounterMask": "1",
"EventCode": "0x2d",
"EventName": "XQ.FULL_CYCLES",
- "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache). Available PDIST counters: 0",
+ "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache).",
"SampleAfterValue": "1000003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
index 48bec483b49a..1fa7957956df 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/pipeline.json
@@ -6,7 +6,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -16,7 +15,7 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIV_ACTIVE",
- "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -27,7 +26,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FP_DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.FPDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -37,7 +35,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.IDIV_ACTIVE",
- "PublicDescription": "This event counts the cycles the integer divider is busy. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -48,7 +45,6 @@
"Deprecated": "1",
"EventCode": "0xb0",
"EventName": "ARITH.INT_DIVIDER_ACTIVE",
- "PublicDescription": "This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -57,7 +53,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.ANY",
- "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.",
"SampleAfterValue": "100003",
"UMask": "0x1b"
},
@@ -217,7 +213,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C01",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -226,7 +222,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C02",
- "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -235,7 +231,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C0_WAIT",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
"SampleAfterValue": "2000003",
"UMask": "0x70"
},
@@ -244,7 +240,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
- "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -253,7 +249,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
- "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted. Available PDIST counters: 0",
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
"SampleAfterValue": "25003",
"UMask": "0x2"
},
@@ -262,7 +258,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -273,7 +268,6 @@
"EdgeDetect": "1",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE_INST Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -282,7 +276,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
- "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -299,7 +293,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -316,7 +310,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. Available PDIST counters: 0",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
"SampleAfterValue": "2000003"
},
{
@@ -325,7 +319,6 @@
"CounterMask": "8",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -335,7 +328,6 @@
"CounterMask": "1",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
- "PublicDescription": "Cycles while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -345,7 +337,6 @@
"CounterMask": "16",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
- "PublicDescription": "Cycles while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -355,7 +346,6 @@
"CounterMask": "12",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xc"
},
@@ -365,7 +355,6 @@
"CounterMask": "5",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x5"
},
@@ -375,7 +364,6 @@
"CounterMask": "4",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
- "PublicDescription": "Total execution stalls. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -384,7 +372,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb7",
"EventName": "EXE.AMX_BUSY",
- "PublicDescription": "Counts the cycles where the AMX (Advance Matrix Extension) unit is busy performing an operation. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -393,7 +380,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -402,7 +389,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 2 or 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0xc"
},
@@ -411,7 +397,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -420,7 +406,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -429,7 +415,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
- "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -439,7 +425,6 @@
"CounterMask": "5",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
- "PublicDescription": "Execution stalls while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x21"
},
@@ -449,7 +434,7 @@
"CounterMask": "2",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
- "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -458,7 +443,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
- "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load. Available PDIST counters: 0",
+ "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.",
"SampleAfterValue": "1000003",
"UMask": "0x80"
},
@@ -467,7 +452,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x75",
"EventName": "INST_DECODED.DECODERS",
- "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -492,7 +477,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -501,7 +485,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -518,7 +502,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -529,7 +513,7 @@
"EdgeDetect": "1",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEARS_COUNT",
- "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -538,7 +522,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
- "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path. Available PDIST counters: 0",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
"SampleAfterValue": "500009",
"UMask": "0x80"
},
@@ -547,7 +531,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.MBA_STALLS",
- "PublicDescription": "INT_MISC.MBA_STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -556,7 +539,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.RECOVERY_CYCLES",
- "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event. Available PDIST counters: 0",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -567,7 +550,6 @@
"EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
"MSRIndex": "0x3F7",
"MSRValue": "0x7",
- "PublicDescription": "Bubble cycles of BAClear (Unknown Branch). Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -576,7 +558,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.UOP_DROPPING",
- "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons Available PDIST counters: 0",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -585,7 +567,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.128BIT",
- "PublicDescription": "INT_VEC_RETIRED.128BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x13"
},
@@ -594,7 +575,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.256BIT",
- "PublicDescription": "INT_VEC_RETIRED.256BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xac"
},
@@ -603,7 +583,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_128",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -612,7 +592,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_256",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0xc"
},
@@ -621,7 +601,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.MUL_256",
- "PublicDescription": "INT_VEC_RETIRED.MUL_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x80"
},
@@ -630,7 +609,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.SHUFFLES",
- "PublicDescription": "INT_VEC_RETIRED.SHUFFLES Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -639,7 +617,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_128",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_128 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -648,7 +625,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_256",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -657,7 +633,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.ADDRESS_ALIAS",
- "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -666,7 +642,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.NO_SR",
- "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
"SampleAfterValue": "100003",
"UMask": "0x88"
},
@@ -675,7 +651,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.STORE_FORWARD",
- "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
"SampleAfterValue": "100003",
"UMask": "0x82"
},
@@ -684,7 +660,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x4c",
"EventName": "LOAD_HIT_PREFETCH.SWPF",
- "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -694,7 +670,7 @@
"CounterMask": "1",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_ACTIVE",
- "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -704,7 +680,7 @@
"CounterMask": "6",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_OK",
- "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -713,7 +689,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa8",
"EventName": "LSD.UOPS",
- "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -724,7 +700,7 @@
"EdgeDetect": "1",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.COUNT",
- "PublicDescription": "Counts the number of machine clears (nukes) of any type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -733,7 +709,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.SMC",
- "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear. Available PDIST counters: 0",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -742,7 +718,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe0",
"EventName": "MISC2_RETIRED.LFENCE",
- "PublicDescription": "number of LFENCE retired instructions Available PDIST counters: 0",
+ "PublicDescription": "number of LFENCE retired instructions",
"SampleAfterValue": "400009",
"UMask": "0x20"
},
@@ -751,7 +727,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcc",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT. Available PDIST counters: 0",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -760,7 +736,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SB",
- "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end. Available PDIST counters: 0",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -769,7 +745,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SCOREBOARD",
- "PublicDescription": "Counts cycles where the pipeline is stalled due to serializing operations. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -778,7 +753,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY",
- "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses) Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
"SampleAfterValue": "1000003",
"UMask": "0x7"
},
@@ -790,7 +765,7 @@
"EventCode": "0xa5",
"EventName": "RS.EMPTY_COUNT",
"Invert": "1",
- "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events) Available PDIST counters: 0",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
"SampleAfterValue": "100003",
"UMask": "0x7"
},
@@ -799,7 +774,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY_RESOURCE",
- "PublicDescription": "Cycles when Reservation Station (RS) is empty due to a resource in the back-end Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -812,7 +786,6 @@
"EventCode": "0xa5",
"EventName": "RS_EMPTY.COUNT",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event RS.EMPTY_COUNT Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x7"
},
@@ -822,7 +795,6 @@
"Deprecated": "1",
"EventCode": "0xa5",
"EventName": "RS_EMPTY.CYCLES",
- "PublicDescription": "This event is deprecated. Refer to new event RS.EMPTY Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x7"
},
@@ -831,7 +803,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
- "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.",
"SampleAfterValue": "10000003",
"UMask": "0x2"
},
@@ -840,7 +812,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BAD_SPEC_SLOTS",
- "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.",
"SampleAfterValue": "10000003",
"UMask": "0x4"
},
@@ -849,7 +821,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
- "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction. Available PDIST counters: 0",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
"UMask": "0x8"
},
@@ -858,7 +830,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
- "PublicDescription": "TOPDOWN.MEMORY_BOUND_SLOTS Available PDIST counters: 0",
"SampleAfterValue": "10000003",
"UMask": "0x10"
},
@@ -875,7 +846,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.SLOTS_P",
- "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
"SampleAfterValue": "10000003",
"UMask": "0x1"
},
@@ -884,7 +855,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x76",
"EventName": "UOPS_DECODED.DEC0_UOPS",
- "PublicDescription": "UOPS_DECODED.DEC0_UOPS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -893,7 +863,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_0",
- "PublicDescription": "Number of uops dispatch to execution port 0. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 0.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -902,7 +872,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_1",
- "PublicDescription": "Number of uops dispatch to execution port 1. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 1.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -911,7 +881,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_2_3_10",
- "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -920,7 +890,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_4_9",
- "PublicDescription": "Number of uops dispatch to execution ports 4 and 9 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 4 and 9",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -929,7 +899,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_5_11",
- "PublicDescription": "Number of uops dispatch to execution ports 5 and 11 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 5 and 11",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -938,7 +908,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_6",
- "PublicDescription": "Number of uops dispatch to execution port 6. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 6.",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -947,7 +917,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_7_8",
- "PublicDescription": "Number of uops dispatch to execution ports 7 and 8. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.",
"SampleAfterValue": "2000003",
"UMask": "0x80"
},
@@ -956,7 +926,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE",
- "PublicDescription": "Counts the number of uops executed from any thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops executed from any thread.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -966,7 +936,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
- "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -976,7 +946,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
- "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -986,7 +956,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
- "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -996,7 +966,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
- "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1006,7 +976,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_1",
- "PublicDescription": "Cycles where at least 1 uop was executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1016,7 +986,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_2",
- "PublicDescription": "Cycles where at least 2 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1026,7 +996,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_3",
- "PublicDescription": "Cycles where at least 3 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1036,7 +1006,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_4",
- "PublicDescription": "Cycles where at least 4 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1047,7 +1017,7 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALLS",
"Invert": "1",
- "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1059,7 +1029,6 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALL_CYCLES",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event UOPS_EXECUTED.STALLS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1068,7 +1037,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.THREAD",
- "PublicDescription": "Counts the number of uops to be executed per-thread each cycle. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1077,7 +1045,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.X87",
- "PublicDescription": "Counts the number of x87 uops executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -1086,7 +1054,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.ANY",
- "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1096,7 +1064,6 @@
"CounterMask": "1",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.CYCLES",
- "PublicDescription": "UOPS_ISSUED.CYCLES Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1106,7 +1073,7 @@
"CounterMask": "1",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.CYCLES",
- "PublicDescription": "Counts cycles where at least one uop has retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where at least one uop has retired.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1115,7 +1082,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.HEAVY",
- "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1126,7 +1093,6 @@
"EventName": "UOPS_RETIRED.MS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PublicDescription": "UOPS_RETIRED.MS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -1135,7 +1101,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.SLOTS",
- "PublicDescription": "Counts the retirement slots used each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the retirement slots used each cycle.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1146,7 +1112,7 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALLS",
"Invert": "1",
- "PublicDescription": "This event counts cycles without actually retired uops. Available PDIST counters: 0",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1158,7 +1124,6 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALL_CYCLES",
"Invert": "1",
- "PublicDescription": "This event is deprecated. Refer to new event UOPS_RETIRED.STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2"
}
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json
index f453202d80c2..92cf47967f0b 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-cache.json
@@ -312,6 +312,17 @@
"Unit": "CHA"
},
{
+ "BriefDescription": "Distress signal asserted : DPT Remote",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xaf",
+ "EventName": "UNC_CHA_DISTRESS_ASSERTED.DPT_NONLOCAL",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Distress signal asserted : DPT Remote : Counts the number of cycles either the local or incoming distress signals are asserted. : Dynamic Prefetch Throttle received by this tile",
+ "UMask": "0x8",
+ "Unit": "CHA"
+ },
+ {
"BriefDescription": "Egress Blocking due to Ordering requirements : Down",
"Counter": "0,1,2,3",
"EventCode": "0xba",
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json
index 68be01dad7c9..30044177ccf8 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-memory.json
@@ -2770,6 +2770,88 @@
"Unit": "iMC"
},
{
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Number of DRAM Refreshes Issued : Counts the number of refreshes issued.",
+ "UMask": "0x24",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH_ALL",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x24",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH_PCH0",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.HIGH_PCH1",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x20",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Number of DRAM Refreshes Issued : Counts the number of refreshes issued.",
+ "UMask": "0x12",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC_ALL",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x12",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC_PCH0",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Number of DRAM Refreshes Issued",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x45",
+ "EventName": "UNC_M_DRAM_REFRESH.PANIC_PCH1",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x10",
+ "Unit": "iMC"
+ },
+ {
"BriefDescription": "ECC Correctable Errors",
"Counter": "0,1,2,3",
"EventCode": "0x09",
@@ -3048,6 +3130,28 @@
"Unit": "iMC"
},
{
+ "BriefDescription": "Throttle Cycles for Rank 0",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x46",
+ "EventName": "UNC_M_POWER_THROTTLE_CYCLES.SLOT0",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Throttle Cycles for Rank 0 : Counts the number of cycles while the iMC is being throttled by either thermal constraints or by the PCU throttling. It is not possible to distinguish between the two. This can be filtered by rank. If multiple ranks are selected and are being throttled at the same time, the counter will only increment by 1. : Thermal throttling is performed per DIMM. We support 3 DIMMs per channel. This ID allows us to filter by ID.",
+ "UMask": "0x1",
+ "Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Throttle Cycles for Rank 0",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x46",
+ "EventName": "UNC_M_POWER_THROTTLE_CYCLES.SLOT1",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "PublicDescription": "Throttle Cycles for Rank 0 : Counts the number of cycles while the iMC is being throttled by either thermal constraints or by the PCU throttling. It is not possible to distinguish between the two. This can be filtered by rank. If multiple ranks are selected and are being throttled at the same time, the counter will only increment by 1.",
+ "UMask": "0x2",
+ "Unit": "iMC"
+ },
+ {
"BriefDescription": "Precharge due to read, write, underfill, or PGT.",
"Counter": "0,1,2,3",
"EventCode": "0x03",
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-power.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-power.json
index 9482ddaea4d1..71c35b165a3e 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-power.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/uncore-power.json
@@ -178,7 +178,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x35",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C0",
- "Experimental": "1",
"PerPkg": "1",
"PublicDescription": "Number of cores in C0 : This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
"Unit": "PCU"
@@ -198,7 +197,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x37",
"EventName": "UNC_P_POWER_STATE_OCCUPANCY_CORES_C6",
- "Experimental": "1",
"PerPkg": "1",
"PublicDescription": "Number of cores in C6 : This is an occupancy event that tracks the number of cores that are in the chosen C-State. It can be used by itself to get the average number of cores in that C-state with thresholding to generate histograms, or with other PCU events and occupancy triggering to capture other details.",
"Unit": "PCU"
diff --git a/tools/perf/pmu-events/arch/x86/emeraldrapids/virtual-memory.json b/tools/perf/pmu-events/arch/x86/emeraldrapids/virtual-memory.json
index 3d3f88600e26..609a9549cbf3 100644
--- a/tools/perf/pmu-events/arch/x86/emeraldrapids/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/emeraldrapids/virtual-memory.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
- "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -14,7 +14,7 @@
"CounterMask": "1",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -23,7 +23,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -32,7 +32,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -41,7 +41,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -50,7 +50,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -59,7 +59,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -68,7 +68,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.STLB_HIT",
- "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -78,7 +78,7 @@
"CounterMask": "1",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -87,7 +87,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -96,7 +96,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -105,7 +105,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -114,7 +114,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -123,7 +123,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -132,7 +132,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.STLB_HIT",
- "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -142,7 +142,7 @@
"CounterMask": "1",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -151,7 +151,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -160,7 +160,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -169,7 +169,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -178,7 +178,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
}
diff --git a/tools/perf/pmu-events/arch/x86/grandridge/cache.json b/tools/perf/pmu-events/arch/x86/grandridge/cache.json
index 877052db1490..9abddb06a837 100644
--- a/tools/perf/pmu-events/arch/x86/grandridge/cache.json
+++ b/tools/perf/pmu-events/arch/x86/grandridge/cache.json
@@ -286,7 +286,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_1024",
@@ -297,7 +297,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
@@ -308,7 +308,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
@@ -319,7 +319,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_2048",
@@ -330,7 +330,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
@@ -341,7 +341,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
@@ -352,7 +352,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
@@ -363,7 +363,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
@@ -374,7 +374,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
@@ -385,7 +385,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled.",
- "Counter": "0,1",
+ "Counter": "0,1,2,3,4,5,6,7",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
diff --git a/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json b/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json
index 878b1caf12de..a0d637a24c1b 100644
--- a/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/grandridge/grr-metrics.json
@@ -1,56 +1,56 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
@@ -633,7 +633,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricName": "tma_info_system_cpu_utilization"
},
{
@@ -645,7 +645,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE_P@k / CPU_CLK_UNHALTED.CORE",
+ "MetricExpr": "CPU_CLK_UNHALTED.CORE_P:k / CPU_CLK_UNHALTED.CORE",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_kernel_utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/cache.json b/tools/perf/pmu-events/arch/x86/graniterapids/cache.json
index dbdeade6fe6f..db28866444b6 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/cache.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/cache.json
@@ -4,7 +4,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.HWPF_MISS",
- "PublicDescription": "L1D.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -13,7 +12,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.REPLACEMENT",
- "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace. Available PDIST counters: 0",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -22,7 +21,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -33,7 +32,7 @@
"EdgeDetect": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
- "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -42,7 +41,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALLS",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -51,7 +50,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING",
- "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -61,7 +60,7 @@
"CounterMask": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "PublicDescription": "Counts duration of L1D miss outstanding in cycles. Available PDIST counters: 0",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -70,7 +69,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.ALL",
- "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
"SampleAfterValue": "100003",
"UMask": "0x1f"
},
@@ -79,7 +78,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.NON_SILENT",
- "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3 Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
"SampleAfterValue": "200003",
"UMask": "0x2"
},
@@ -88,7 +87,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.SILENT",
- "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event.",
"SampleAfterValue": "200003",
"UMask": "0x1"
},
@@ -97,7 +96,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.USELESS_HWPF",
- "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
@@ -106,7 +105,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.ALL",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES]",
"SampleAfterValue": "200003",
"UMask": "0xff"
},
@@ -115,7 +114,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.HIT",
- "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_RQSTS.HIT] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_RQSTS.HIT]",
"SampleAfterValue": "200003",
"UMask": "0xdf"
},
@@ -124,7 +123,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f"
},
@@ -133,7 +132,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_CODE_RD",
- "PublicDescription": "Counts the total number of L2 code requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of L2 code requests.",
"SampleAfterValue": "200003",
"UMask": "0xe4"
},
@@ -142,7 +141,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0xe1"
},
@@ -151,7 +150,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
- "PublicDescription": "Counts demand requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x27"
},
@@ -160,7 +159,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
- "PublicDescription": "Counts demand requests to L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests to L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xe7"
},
@@ -169,7 +168,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_HWPF",
- "PublicDescription": "L2_RQSTS.ALL_HWPF Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0xf0"
},
@@ -178,7 +176,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_RFO",
- "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
"SampleAfterValue": "200003",
"UMask": "0xe2"
},
@@ -187,7 +185,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_HIT",
- "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
"SampleAfterValue": "200003",
"UMask": "0xc4"
},
@@ -196,7 +194,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_MISS",
- "PublicDescription": "Counts L2 cache misses when fetching instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
"SampleAfterValue": "200003",
"UMask": "0x24"
},
@@ -205,7 +203,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc1"
},
@@ -214,7 +212,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
- "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0x21"
},
@@ -223,7 +221,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HIT",
- "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_REQUEST.HIT] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_REQUEST.HIT]",
"SampleAfterValue": "200003",
"UMask": "0xdf"
},
@@ -232,7 +230,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HWPF_MISS",
- "PublicDescription": "L2_RQSTS.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x30"
},
@@ -241,7 +238,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f"
},
@@ -250,7 +247,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.REFERENCES",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL]",
"SampleAfterValue": "200003",
"UMask": "0xff"
},
@@ -259,7 +256,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_HIT",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc2"
},
@@ -268,7 +265,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_MISS",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x22"
},
@@ -277,7 +274,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_HIT",
- "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0xc8"
},
@@ -286,7 +283,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_MISS",
- "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0x28"
},
@@ -295,7 +292,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x23",
"EventName": "L2_TRANS.L2_WB",
- "PublicDescription": "Counts L2 writebacks that access L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x40"
},
@@ -304,7 +301,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
- "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x41"
},
@@ -313,7 +310,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.REFERENCE",
- "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x4f"
},
@@ -437,7 +434,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
- "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss) Available PDIST counters: 0",
+ "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)",
"SampleAfterValue": "1000003",
"UMask": "0xfd"
},
@@ -491,12 +488,12 @@
"UMask": "0x2"
},
{
- "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
+ "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from dram homed in the local socket",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd3",
"EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
- "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM. Available PDIST counters: 0",
+ "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from DRAM homed in the local socket. Available PDIST counters: 0",
"RetirementLatencyMax": 4146,
"RetirementLatencyMean": 115.83,
"RetirementLatencyMin": 0,
@@ -504,6 +501,15 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Retired load instructions with remote cxl mem as the data source where the data request missed all caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd3",
+ "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_CXL_MEM",
+ "PublicDescription": "Counts retired load instructions with remote cxl mem as the data source and the data request missed L3. Available PDIST counters: 0",
+ "SampleAfterValue": "100007",
+ "UMask": "0x10"
+ },
+ {
"BriefDescription": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM",
"Counter": "0,1,2,3",
"Data_LA": "1",
@@ -629,11 +635,20 @@
"UMask": "0x20"
},
{
+ "BriefDescription": "Retired load instructions with local cxl mem as the data source where the data request missed all caches.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd1",
+ "EventName": "MEM_LOAD_RETIRED.LOCAL_CXL_MEM",
+ "PublicDescription": "Counts retired load instructions with local cxl mem as the data source and the data request missed L3. Available PDIST counters: 0",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80"
+ },
+ {
"BriefDescription": "MEM_STORE_RETIRED.L2_HIT",
"Counter": "0,1,2,3",
"EventCode": "0x44",
"EventName": "MEM_STORE_RETIRED.L2_HIT",
- "PublicDescription": "MEM_STORE_RETIRED.L2_HIT Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x1"
},
@@ -642,7 +657,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe5",
"EventName": "MEM_UOP_RETIRED.ANY",
- "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses Available PDIST counters: 0",
+ "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -691,6 +706,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand data reads that were supplied by CXL MEM (Type 2 or Type 3).",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_DATA_RD.CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703C00001",
+ "PublicDescription": "Counts demand data reads that were supplied by CXL MEM (Type 2 or Type 3). Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts demand data reads that hit in the L3 or were snooped from another core's caches on the same socket.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -735,6 +761,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand data reads that were supplied by CXL MEM (Type 2 and Type 3) attached to local socket.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_DATA_RD.LOCAL_CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x700C00001",
+ "PublicDescription": "Counts demand data reads that were supplied by CXL MEM (Type 2 and Type 3) attached to local socket. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts demand data reads that were supplied by a cache on a remote socket where a snoop hit a modified line in another core's caches which forwarded the data.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -757,6 +794,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand data reads that were supplied by CXL MEM (Type 2 or Type 3) attached to another socket.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_DATA_RD.REMOTE_CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703000001",
+ "PublicDescription": "Counts demand data reads that were supplied by CXL MEM (Type 2 or Type 3) attached to another socket. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts demand data reads that hit a modified line in a distant L3 Cache or were snooped from a distant core's L1/L2 caches on this socket when the system is in SNC (sub-NUMA cluster) mode.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -790,6 +838,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by CXL MEM (Type 2 or Type 3).",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_RFO.CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703C00002",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by CXL MEM (Type 2 or Type 3). Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that hit in the L3 or were snooped from another core's caches on the same socket.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -812,6 +871,28 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by CXL MEM (Type 2 and Type 3) attached to local socket.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_RFO.LOCAL_CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x700C00002",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by CXL MEM (Type 2 and Type 3) attached to local socket. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
+ "BriefDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by CXL MEM (Type 2 or Type 3) attached to another socket.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.DEMAND_RFO.REMOTE_CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703000002",
+ "PublicDescription": "Counts demand reads for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by CXL MEM (Type 2 or Type 3) attached to another socket. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts writebacks of modified cachelines and streaming stores that have any type of response.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -834,6 +915,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by CXL MEM (Type 2 or Type 3).",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703C04477",
+ "PublicDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by CXL MEM (Type 2 or Type 3). Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches on the same socket.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -856,6 +948,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by CXL MEM (Type 2 and Type 3) attached to local socket.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.LOCAL_CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x700C04477",
+ "PublicDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by CXL MEM (Type 2 and Type 3) attached to local socket. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were not supplied by the local socket's L1, L2, or L3 caches and were supplied by a remote socket.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -900,6 +1003,17 @@
"UMask": "0x1"
},
{
+ "BriefDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by CXL MEM (Type 2 or Type 3) attached to another socket.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.REMOTE_CXL_MEM",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x703004477",
+ "PublicDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that were supplied by CXL MEM (Type 2 or Type 3) attached to another socket. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts all (cacheable) data read, code read and RFO requests including demands and prefetches to the core caches (L1 or L2) that hit a modified line in a distant L3 Cache or were snooped from a distant core's L1/L2 caches on this socket when the system is in SNC (sub-NUMA cluster) mode.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -937,7 +1051,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc.. Available PDIST counters: 0",
+ "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -946,7 +1060,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DATA_RD",
- "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -955,7 +1069,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
- "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -964,7 +1078,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore. Available PDIST counters: 0",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -973,7 +1087,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
- "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -982,7 +1096,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.MEM_UC",
- "PublicDescription": "This event counts noncacheable memory data read transactions. Available PDIST counters: 0",
+ "PublicDescription": "This event counts noncacheable memory data read transactions.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -992,7 +1106,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1002,7 +1116,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1012,7 +1126,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "PublicDescription": "Cycles where at least 1 outstanding demand data read request is pending. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1022,7 +1135,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -1031,7 +1144,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -1040,7 +1152,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1049,7 +1161,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -1058,7 +1170,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
- "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -1067,7 +1179,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x2c",
"EventName": "SQ_MISC.BUS_LOCK",
- "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory. Available PDIST counters: 0",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -1076,7 +1188,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.ANY",
- "PublicDescription": "Counts the number of PREFETCHNTA, PREFETCHW, PREFETCHT0, PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0xf"
},
@@ -1085,7 +1196,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.NTA",
- "PublicDescription": "Counts the number of PREFETCHNTA instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -1094,7 +1205,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
- "PublicDescription": "Counts the number of PREFETCHW instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -1103,7 +1214,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T0",
- "PublicDescription": "Counts the number of PREFETCHT0 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -1112,7 +1223,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x4"
}
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/floating-point.json b/tools/perf/pmu-events/arch/x86/graniterapids/floating-point.json
index 1832dd952f66..59789eee060c 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/floating-point.json
@@ -5,7 +5,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FPDIV_ACTIVE",
- "PublicDescription": "This event counts the cycles the floating point divider is busy. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -14,7 +13,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.FP",
- "PublicDescription": "Counts all microcode Floating Point assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -23,7 +22,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.SSE_AVX_MIX",
- "PublicDescription": "ASSISTS.SSE_AVX_MIX Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -32,7 +30,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -41,7 +38,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -50,7 +46,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -59,7 +54,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V0",
- "PublicDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -68,7 +62,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V1",
- "PublicDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -77,7 +70,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V2",
- "PublicDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -86,7 +78,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -95,7 +87,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -104,7 +96,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -113,7 +105,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -122,7 +114,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x18"
},
@@ -131,7 +123,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x40"
},
@@ -140,7 +132,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.512B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 512-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 16 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT14 RCP14 FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -149,7 +141,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.8_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision and 512-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision and double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RSQRT14 RCP RCP14 DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x60"
},
@@ -158,7 +150,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -167,7 +159,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -176,7 +168,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -185,7 +177,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.VECTOR",
- "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0xfc"
},
@@ -194,7 +186,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.128B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.128B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -203,7 +194,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.256B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.256B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -212,7 +202,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.512B_PACKED_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.512B_PACKED_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -221,7 +210,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.COMPLEX_SCALAR_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -230,7 +218,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.SCALAR",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR Available PDIST counters: 0",
+ "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR",
"SampleAfterValue": "100003",
"UMask": "0x3"
},
@@ -239,7 +227,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.SCALAR_HALF",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.SCALAR_HALF Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -248,7 +235,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcf",
"EventName": "FP_ARITH_INST_RETIRED2.VECTOR",
- "PublicDescription": "FP_ARITH_INST_RETIRED2.VECTOR Available PDIST counters: 0",
+ "PublicDescription": "FP_ARITH_INST_RETIRED2.VECTOR",
"SampleAfterValue": "100003",
"UMask": "0x1c"
}
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/frontend.json b/tools/perf/pmu-events/arch/x86/graniterapids/frontend.json
index b7cd92fbecd5..d580d305c926 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/frontend.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x60",
"EventName": "BACLEARS.ANY",
- "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -13,7 +13,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -22,7 +22,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.MS_BUSY",
- "PublicDescription": "Cycles the Microcode Sequencer is busy. Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x2"
},
@@ -31,7 +30,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x61",
"EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
- "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE. Available PDIST counters: 0",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -303,7 +302,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALLS",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -314,7 +313,6 @@
"EdgeDetect": "1",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALL_PERIODS",
- "PublicDescription": "ICACHE_DATA.STALL_PERIODS Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x4"
},
@@ -323,7 +321,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x83",
"EventName": "ICACHE_TAG.STALLS",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"SampleAfterValue": "200003",
"UMask": "0x4"
},
@@ -333,7 +331,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -343,7 +341,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -352,7 +350,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -362,7 +360,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -372,7 +370,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -381,7 +379,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MITE_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -391,7 +389,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_CYCLES_ANY",
- "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -402,7 +400,7 @@
"EdgeDetect": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_SWITCHES",
- "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer. Available PDIST counters: 0",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -411,7 +409,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MS_UOPS",
- "PublicDescription": "Counts the number of uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -420,7 +418,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CORE",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations. The count may be distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method. Available PDIST counters: 0",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations. The count may be distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -430,7 +428,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -441,7 +439,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -450,7 +448,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -460,7 +458,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -471,7 +469,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json b/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json
index 9a620e1b8de8..cc3c834ca286 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/gnr-metrics.json
@@ -1,28 +1,28 @@
[
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
@@ -381,7 +381,7 @@
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-be\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvOB;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -416,39 +416,39 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * tma_amx_busy / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + 0 / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * tma_amx_busy / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20"
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * ((1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0) / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricExpr": "100 * ((1 - INST_RETIRED.REP_ITERATION / cpu@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + RS.EMPTY_RESOURCE / tma_info_thread_clks * tma_ports_utilized_0) / (tma_amx_busy + tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -456,7 +456,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
"MetricThreshold": "tma_bottleneck_memory_data_tlbs > 20",
@@ -464,7 +464,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
"MetricThreshold": "tma_bottleneck_memory_synchronization > 10",
@@ -480,7 +480,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -496,7 +496,7 @@
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-br\\-mispredict / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;Default;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
@@ -613,7 +613,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS * min(MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS:R, 74.6 * tma_info_system_core_frequency) + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * min(MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD:R, 76.6 * tma_info_system_core_frequency) * (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "BvMS;DataSharing;LockCont;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -633,6 +632,15 @@
"ScaleUnit": "100%"
},
{
+ "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g",
+ "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_CXL_MEM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_CXL_MEM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 1)) * (MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_CXL_MEM + MEM_LOAD_RETIRED.LOCAL_CXL_MEM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_cxl_mem_bound",
+ "MetricThreshold": "tma_cxl_mem_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g. 3D-Xpoint (Crystal Ridge, a.k.a. IXP) memory, PMM - Persistent Memory Module [from CLX to SPR] or any other CXL Type3 Memory [EMR onwards]).",
+ "ScaleUnit": "100%"
+ },
+ {
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD * min(MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD:R, 74.6 * tma_info_system_core_frequency) + MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD * min(MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD:R, 74.6 * tma_info_system_core_frequency) * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
@@ -662,7 +670,7 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
- "MetricExpr": "MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks",
+ "MetricExpr": "(MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks - tma_cxl_mem_bound if #has_pmem > 0 else MEMORY_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -720,7 +728,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -848,7 +856,7 @@
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-heavy\\-ops / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "Default;Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1395,19 +1403,19 @@
{
"BriefDescription": "Off-core accesses per kilo instruction for modified write requests",
"MetricExpr": "1e3 * OCR.MODIFIED_WRITE.ANY_RESPONSE / tma_info_inst_mix_instructions",
- "MetricGroup": "Offcore",
+ "MetricGroup": "Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_mwrite_any_pki"
},
{
"BriefDescription": "Off-core accesses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
"MetricExpr": "1e3 * OCR.READS_TO_CORE.ANY_RESPONSE / tma_info_inst_mix_instructions",
- "MetricGroup": "CacheHits;Offcore",
+ "MetricGroup": "CacheHits;Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_read_any_pki"
},
{
"BriefDescription": "L3 cache misses per kilo instruction for reads-to-core requests (speculative; including in-core HW prefetches)",
"MetricExpr": "1e3 * OCR.READS_TO_CORE.L3_MISS / tma_info_inst_mix_instructions",
- "MetricGroup": "Offcore",
+ "MetricGroup": "Offcore;Server",
"MetricName": "tma_info_memory_mix_offcore_read_l3m_pki"
},
{
@@ -1433,21 +1441,21 @@
{
"BriefDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket",
"MetricExpr": "64 * OCR.READS_TO_CORE.DRAM / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_dram_bw",
"PublicDescription": "Average DRAM BW for Reads-to-Core (R2C) covering for memory attached to local- and remote-socket. See R2C_Offcore_BW."
},
{
"BriefDescription": "Average L3-cache miss BW for Reads-to-Core (R2C)",
"MetricExpr": "64 * OCR.READS_TO_CORE.L3_MISS / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_l3m_bw",
"PublicDescription": "Average L3-cache miss BW for Reads-to-Core (R2C). This covering going to DRAM or other memory off-chip memory tears. See R2C_Offcore_BW."
},
{
"BriefDescription": "Average Off-core access BW for Reads-to-Core (R2C)",
"MetricExpr": "64 * OCR.READS_TO_CORE.ANY_RESPONSE / 1e9 / tma_info_system_time",
- "MetricGroup": "HPC;Mem;MemoryBW;SoC",
+ "MetricGroup": "HPC;Mem;MemoryBW;Offcore;Server",
"MetricName": "tma_info_memory_soc_r2c_offcore_bw",
"PublicDescription": "Average Off-core access BW for Reads-to-Core (R2C). R2C account for demand or prefetch load/RFO/code access that fill data into the Core caches."
},
@@ -1491,7 +1499,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1538,7 +1546,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1550,16 +1558,28 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
{
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
+ "MetricExpr": "(64 * UNC_CXLCM_RxC_PACK_BUF_INSERTS.MEM_DATA / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_read_bw"
+ },
+ {
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "(64 * UNC_CXLDP_TxC_AGF_INSERTS.M2S_DATA / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_write_bw"
+ },
+ {
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "64 * (UNC_M_CAS_COUNT_SCH0.RD + UNC_M_CAS_COUNT_SCH1.RD + UNC_M_CAS_COUNT_SCH0.WR + UNC_M_CAS_COUNT_SCH1.WR) / 1e9 / tma_info_system_time",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1771,12 +1791,12 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - MEMORY_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1790,7 +1810,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L2 cache under unloaded scenarios (possibly L2 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEM_LOAD_RETIRED.L2_HIT * min(MEM_LOAD_RETIRED.L2_HIT:R, 4.4 * tma_info_system_core_frequency) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_l2_bound_group",
"MetricName": "tma_l2_hit_latency",
@@ -1809,12 +1828,11 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEM_LOAD_RETIRED.L3_HIT * min(MEM_LOAD_RETIRED.L3_HIT:R, 32.6 * tma_info_system_core_frequency) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1897,6 +1915,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "MEM_INST_RETIRED.LOCK_LOADS * MEM_INST_RETIRED.LOCK_LOADS:R / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -1929,7 +1948,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1938,13 +1957,13 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"DefaultMetricgroupName": "TopdownL2",
- "MetricExpr": "topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-mem\\-bound / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "Backend;Default;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -1954,7 +1973,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * MISC2_RETIRED.LFENCE / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -2007,7 +2025,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "max(IDQ.MS_CYCLES_ANY, cpu@UOPS_RETIRED.MS\\,cmask\\=1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY)) / tma_info_core_core_clks / 2",
+ "MetricExpr": "max(IDQ.MS_CYCLES_ANY, cpu@UOPS_RETIRED.MS\\,cmask\\=1@ / (UOPS_RETIRED.SLOTS / UOPS_ISSUED.ANY)) / tma_info_core_core_clks / 2.4",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -2042,6 +2060,7 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "max(0, tma_light_operations - (tma_fp_arith + tma_int_operations + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
@@ -2103,6 +2122,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "((tma_ports_utilized_0 * tma_info_thread_clks + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_3_PORTS_UTIL)) / tma_info_thread_clks if ARITH.DIV_ACTIVE < CYCLE_ACTIVITY.STALLS_TOTAL - EXE_ACTIVITY.BOUND_ON_LOADS else (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_3_PORTS_UTIL) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -2112,6 +2132,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "max(EXE_ACTIVITY.EXE_BOUND_0_PORTS - RESOURCE_STALLS.SCOREBOARD, 0) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
@@ -2121,6 +2142,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "EXE_ACTIVITY.1_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
@@ -2130,7 +2152,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "EXE_ACTIVITY.2_PORTS_UTIL / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -2140,7 +2161,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "UOPS_EXECUTED.CYCLES_GE_3 / tma_info_thread_clks",
"MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -2150,7 +2170,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote cache in other sockets including synchronizations issues",
- "MetricExpr": "(MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM:R + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD:R) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+ "MetricExpr": "(MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * PEBS + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * PEBS) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Offcore;Server;Snoop;TopdownL5;tma_L5_group;tma_issueSyncxn;tma_mem_latency_group",
"MetricName": "tma_remote_cache",
"MetricThreshold": "tma_remote_cache > 0.05 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -2159,7 +2179,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
- "MetricExpr": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM:R * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
+ "MetricExpr": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * PEBS * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_mem",
"MetricThreshold": "tma_remote_mem > 0.1 & (tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)))",
@@ -2177,7 +2197,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2205,7 +2225,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "CPU_CLK_UNHALTED.PAUSE / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -2237,7 +2256,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/memory.json b/tools/perf/pmu-events/arch/x86/graniterapids/memory.json
index 4db39f304c2c..96f40390becf 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/memory.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/memory.json
@@ -5,7 +5,6 @@
"CounterMask": "2",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
- "PublicDescription": "Cycles while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -15,7 +14,6 @@
"CounterMask": "6",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x6"
},
@@ -24,7 +22,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -34,7 +32,6 @@
"CounterMask": "2",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -44,7 +41,6 @@
"CounterMask": "3",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -54,7 +50,7 @@
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x5"
},
@@ -64,7 +60,7 @@
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -412,7 +408,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Counts demand data read requests that miss the L3 cache. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -422,7 +417,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ. Available PDIST counters: 0",
+ "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -431,7 +426,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -449,7 +444,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_EVENTS",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 3 categories (e.g. interrupt). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to none of the previous 3 categories (e.g. interrupt).",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -458,7 +453,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_MEM",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to various memory events (e.g. read/write capacity and conflicts).",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -467,7 +462,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_MEMTYPE",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to incompatible memory type.",
"SampleAfterValue": "100003",
"UMask": "0x40"
},
@@ -476,7 +471,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.ABORTED_UNFRIENDLY",
- "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times an RTM execution aborted due to HLE-unfriendly instructions.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -485,7 +480,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.COMMIT",
- "PublicDescription": "Counts the number of times RTM commit succeeded. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times RTM commit succeeded.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -494,7 +489,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc9",
"EventName": "RTM_RETIRED.START",
- "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times we entered an RTM region. Does not count nested transactions.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -503,7 +498,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CAPACITY_READ",
- "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads Available PDIST counters: 0",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional reads",
"SampleAfterValue": "100003",
"UMask": "0x80"
},
@@ -512,7 +507,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CAPACITY_WRITE",
- "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes. Available PDIST counters: 0",
+ "PublicDescription": "Speculatively counts the number of Transactional Synchronization Extensions (TSX) aborts due to a data capacity limitation for transactional writes.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -521,7 +516,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x54",
"EventName": "TX_MEM.ABORT_CONFLICT",
- "PublicDescription": "Counts the number of times a TSX line had a cache conflict. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a TSX line had a cache conflict.",
"SampleAfterValue": "100003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/other.json b/tools/perf/pmu-events/arch/x86/graniterapids/other.json
index 8b7aa4caec46..c0747750b1a8 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/other.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/other.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.HARDWARE",
- "PublicDescription": "Count all other hardware assists or traps that are not necessarily architecturally exposed (through a software handler) beyond FP; SSE-AVX mix and A/D assists who are counted by dedicated sub-events. This includes, but not limited to, assists at EXE or MEM uop writeback like AVX* load/store/gather/scatter (non-FP GSSE-assist ) , assists generated by ROB like PEBS and RTIT, Uncore trap, RAR (Remote Action Request) and CET (Control flow Enforcement Technology) assists. the event also counts for Machine Ordering count. Available PDIST counters: 0",
+ "PublicDescription": "Count all other hardware assists or traps that are not necessarily architecturally exposed (through a software handler) beyond FP; SSE-AVX mix and A/D assists who are counted by dedicated sub-events. This includes, but not limited to, assists at EXE or MEM uop writeback like AVX* load/store/gather/scatter (non-FP GSSE-assist ) , assists generated by ROB like PEBS and RTIT, Uncore trap, RAR (Remote Action Request) and CET (Control flow Enforcement Technology) assists. the event also counts for Machine Ordering count.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -13,11 +13,35 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.PAGE_FAULT",
- "PublicDescription": "ASSISTS.PAGE_FAULT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
{
+ "BriefDescription": "HW_INTERRUPTS.MASKED",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.MASKED",
+ "SampleAfterValue": "100003",
+ "UMask": "0x2"
+ },
+ {
+ "BriefDescription": "HW_INTERRUPTS.PENDING_AND_MASKED",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED",
+ "SampleAfterValue": "100003",
+ "UMask": "0x4"
+ },
+ {
+ "BriefDescription": "Number of hardware interrupts received by the processor.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xcb",
+ "EventName": "HW_INTERRUPTS.RECEIVED",
+ "PublicDescription": "Counts the number of hardware interruptions received by the processor.",
+ "SampleAfterValue": "203",
+ "UMask": "0x1"
+ },
+ {
"BriefDescription": "Counts streaming stores that have any type of response.",
"Counter": "0,1,2,3",
"EventCode": "0x2A,0x2B",
@@ -34,7 +58,7 @@
"CounterMask": "1",
"EventCode": "0x2d",
"EventName": "XQ.FULL_CYCLES",
- "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache). Available PDIST counters: 0",
+ "PublicDescription": "number of cycles when the thread is active and the uncore cannot take any further requests (for example prefetches, loads or stores initiated by the Core that miss the L2 cache).",
"SampleAfterValue": "1000003",
"UMask": "0x1"
}
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json b/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json
index 27af3bd6bacf..0fef8fd61974 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/pipeline.json
@@ -5,7 +5,7 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.DIV_ACTIVE",
- "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
"SampleAfterValue": "1000003",
"UMask": "0x9"
},
@@ -15,7 +15,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.IDIV_ACTIVE",
- "PublicDescription": "This event counts the cycles the integer divider is busy. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -24,7 +23,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.ANY",
- "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.",
"SampleAfterValue": "100003",
"UMask": "0x1b"
},
@@ -271,7 +270,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C01",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -280,7 +279,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C02",
- "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state. This state can be entered via the TPAUSE or UMWAIT instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -289,7 +288,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.C0_WAIT",
- "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction. Available PDIST counters: 0",
+ "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
"SampleAfterValue": "2000003",
"UMask": "0x70"
},
@@ -298,7 +297,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
- "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -307,7 +306,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
- "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted. Available PDIST counters: 0",
+ "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
"SampleAfterValue": "25003",
"UMask": "0x2"
},
@@ -316,7 +315,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -327,7 +325,6 @@
"EdgeDetect": "1",
"EventCode": "0xec",
"EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
- "PublicDescription": "CPU_CLK_UNHALTED.PAUSE_INST Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -336,7 +333,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
- "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread. Available PDIST counters: 0",
+ "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -344,7 +341,7 @@
"BriefDescription": "Reference cycles when the core is not in halt state.",
"Counter": "Fixed counter 2",
"EventName": "CPU_CLK_UNHALTED.REF_TSC",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
"UMask": "0x3"
},
@@ -353,7 +350,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
- "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -361,7 +358,7 @@
"BriefDescription": "Core cycles when the thread is not in halt state",
"Counter": "Fixed counter 1",
"EventName": "CPU_CLK_UNHALTED.THREAD",
- "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -370,7 +367,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x3c",
"EventName": "CPU_CLK_UNHALTED.THREAD_P",
- "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time. Available PDIST counters: 0",
+ "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
"SampleAfterValue": "2000003"
},
{
@@ -379,7 +376,6 @@
"CounterMask": "8",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8"
},
@@ -389,7 +385,6 @@
"CounterMask": "1",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
- "PublicDescription": "Cycles while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -399,7 +394,6 @@
"CounterMask": "16",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
- "PublicDescription": "Cycles while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -409,7 +403,6 @@
"CounterMask": "12",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xc"
},
@@ -419,7 +412,6 @@
"CounterMask": "5",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x5"
},
@@ -429,7 +421,6 @@
"CounterMask": "4",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
- "PublicDescription": "Total execution stalls. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x4"
},
@@ -438,7 +429,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb7",
"EventName": "EXE.AMX_BUSY",
- "PublicDescription": "Counts the cycles where the AMX (Advance Matrix Extension) unit is busy performing an operation. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -447,7 +437,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -456,7 +446,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 2 or 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0xc"
},
@@ -465,7 +454,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
- "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -474,7 +463,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
- "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -483,7 +472,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
- "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty. Available PDIST counters: 0",
+ "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -493,7 +482,6 @@
"CounterMask": "5",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
- "PublicDescription": "Execution stalls while memory subsystem has an outstanding load. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x21"
},
@@ -503,7 +491,7 @@
"CounterMask": "2",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
- "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -512,7 +500,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa6",
"EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
- "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load. Available PDIST counters: 0",
+ "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.",
"SampleAfterValue": "1000003",
"UMask": "0x80"
},
@@ -521,7 +509,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x75",
"EventName": "INST_DECODED.DECODERS",
- "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -546,7 +534,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -555,7 +542,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -572,7 +559,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent.",
"SampleAfterValue": "2000003",
"UMask": "0x8"
},
@@ -583,7 +570,7 @@
"EdgeDetect": "1",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEARS_COUNT",
- "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of speculative clears due to any type of branch misprediction or machine clears",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -592,7 +579,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
- "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path. Available PDIST counters: 0",
+ "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
"SampleAfterValue": "500009",
"UMask": "0x80"
},
@@ -601,7 +588,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.MBA_STALLS",
- "PublicDescription": "INT_MISC.MBA_STALLS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -610,7 +596,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.RECOVERY_CYCLES",
- "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event. Available PDIST counters: 0",
+ "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
"SampleAfterValue": "500009",
"UMask": "0x1"
},
@@ -621,7 +607,6 @@
"EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
"MSRIndex": "0x3F7",
"MSRValue": "0x7",
- "PublicDescription": "Bubble cycles of BAClear (Unknown Branch). Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -630,7 +615,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xad",
"EventName": "INT_MISC.UOP_DROPPING",
- "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons Available PDIST counters: 0",
+ "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -639,7 +624,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.128BIT",
- "PublicDescription": "INT_VEC_RETIRED.128BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x13"
},
@@ -648,7 +632,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.256BIT",
- "PublicDescription": "INT_VEC_RETIRED.256BIT Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0xac"
},
@@ -657,7 +640,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_128",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0x3"
},
@@ -666,7 +649,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.ADD_256",
- "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.",
"SampleAfterValue": "1000003",
"UMask": "0xc"
},
@@ -675,7 +658,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.MUL_256",
- "PublicDescription": "INT_VEC_RETIRED.MUL_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x80"
},
@@ -684,7 +666,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.SHUFFLES",
- "PublicDescription": "INT_VEC_RETIRED.SHUFFLES Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x40"
},
@@ -693,7 +674,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_128",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_128 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10"
},
@@ -702,7 +682,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe7",
"EventName": "INT_VEC_RETIRED.VNNI_256",
- "PublicDescription": "INT_VEC_RETIRED.VNNI_256 Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20"
},
@@ -711,7 +690,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.ADDRESS_ALIAS",
- "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -720,7 +699,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.NO_SR",
- "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
"SampleAfterValue": "100003",
"UMask": "0x88"
},
@@ -729,7 +708,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x03",
"EventName": "LD_BLOCKS.STORE_FORWARD",
- "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
"SampleAfterValue": "100003",
"UMask": "0x82"
},
@@ -738,7 +717,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x4c",
"EventName": "LOAD_HIT_PREFETCH.SWPF",
- "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -748,7 +727,7 @@
"CounterMask": "1",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_ACTIVE",
- "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -758,7 +737,7 @@
"CounterMask": "6",
"EventCode": "0xa8",
"EventName": "LSD.CYCLES_OK",
- "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -767,7 +746,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa8",
"EventName": "LSD.UOPS",
- "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -778,7 +757,7 @@
"EdgeDetect": "1",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.COUNT",
- "PublicDescription": "Counts the number of machine clears (nukes) of any type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
"SampleAfterValue": "100003",
"UMask": "0x1"
},
@@ -787,7 +766,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.SMC",
- "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear. Available PDIST counters: 0",
+ "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -796,7 +775,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe0",
"EventName": "MISC2_RETIRED.LFENCE",
- "PublicDescription": "number of LFENCE retired instructions Available PDIST counters: 0",
+ "PublicDescription": "number of LFENCE retired instructions",
"SampleAfterValue": "400009",
"UMask": "0x20"
},
@@ -805,7 +784,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xcc",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT. Available PDIST counters: 0",
+ "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -814,7 +793,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SB",
- "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end. Available PDIST counters: 0",
+ "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -823,7 +802,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa2",
"EventName": "RESOURCE_STALLS.SCOREBOARD",
- "PublicDescription": "Counts cycles where the pipeline is stalled due to serializing operations. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -832,7 +810,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY",
- "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses) Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
"SampleAfterValue": "1000003",
"UMask": "0x7"
},
@@ -844,7 +822,7 @@
"EventCode": "0xa5",
"EventName": "RS.EMPTY_COUNT",
"Invert": "1",
- "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events) Available PDIST counters: 0",
+ "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
"SampleAfterValue": "100003",
"UMask": "0x7"
},
@@ -853,7 +831,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa5",
"EventName": "RS.EMPTY_RESOURCE",
- "PublicDescription": "Cycles when RS was empty and a resource allocation stall is asserted Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -862,7 +839,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Backend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method. Available PDIST counters: 0",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that were not consumed by the back-end pipeline due to lack of back-end resources, as a result of memory subsystem delays, execution units limitations, or other conditions. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Backend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "10000003",
"UMask": "0x2"
},
@@ -871,7 +848,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BAD_SPEC_SLOTS",
- "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations. Available PDIST counters: 0",
+ "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.",
"SampleAfterValue": "10000003",
"UMask": "0x4"
},
@@ -880,7 +857,7 @@
"Counter": "0",
"EventCode": "0xa4",
"EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
- "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction. Available PDIST counters: 0",
+ "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of speculative operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
"SampleAfterValue": "10000003",
"UMask": "0x8"
},
@@ -889,7 +866,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
- "PublicDescription": "TOPDOWN.MEMORY_BOUND_SLOTS Available PDIST counters: 0",
"SampleAfterValue": "10000003",
"UMask": "0x10"
},
@@ -897,7 +873,7 @@
"BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
"Counter": "Fixed counter 3",
"EventName": "TOPDOWN.SLOTS",
- "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3). Available PDIST counters: 0",
+ "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).",
"SampleAfterValue": "10000003",
"UMask": "0x4"
},
@@ -906,7 +882,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xa4",
"EventName": "TOPDOWN.SLOTS_P",
- "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
"SampleAfterValue": "10000003",
"UMask": "0x1"
},
@@ -915,7 +891,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x76",
"EventName": "UOPS_DECODED.DEC0_UOPS",
- "PublicDescription": "This event counts the number of not dec-by-all uops decoded by decoder 0. Available PDIST counters: 0",
+ "PublicDescription": "This event counts the number of not dec-by-all uops decoded by decoder 0.",
"SampleAfterValue": "1000003",
"UMask": "0x1"
},
@@ -924,7 +900,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_0",
- "PublicDescription": "Number of uops dispatch to execution port 0. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 0.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -933,7 +909,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_1",
- "PublicDescription": "Number of uops dispatch to execution port 1. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 1.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -942,7 +918,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_2_3_10",
- "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -951,7 +927,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_4_9",
- "PublicDescription": "Number of uops dispatch to execution ports 4 and 9 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 4 and 9",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -960,7 +936,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_5_11",
- "PublicDescription": "Number of uops dispatch to execution ports 5 and 11 Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 5 and 11",
"SampleAfterValue": "2000003",
"UMask": "0x20"
},
@@ -969,7 +945,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_6",
- "PublicDescription": "Number of uops dispatch to execution port 6. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution port 6.",
"SampleAfterValue": "2000003",
"UMask": "0x40"
},
@@ -978,7 +954,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb2",
"EventName": "UOPS_DISPATCHED.PORT_7_8",
- "PublicDescription": "Number of uops dispatch to execution ports 7 and 8. Available PDIST counters: 0",
+ "PublicDescription": "Number of uops dispatch to execution ports 7 and 8.",
"SampleAfterValue": "2000003",
"UMask": "0x80"
},
@@ -987,7 +963,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE",
- "PublicDescription": "Counts the number of uops executed from any thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops executed from any thread.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -997,7 +973,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
- "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1007,7 +983,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
- "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1017,7 +993,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
- "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1027,7 +1003,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
- "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1037,7 +1013,7 @@
"CounterMask": "1",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_1",
- "PublicDescription": "Cycles where at least 1 uop was executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1047,7 +1023,7 @@
"CounterMask": "2",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_2",
- "PublicDescription": "Cycles where at least 2 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1057,7 +1033,7 @@
"CounterMask": "3",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_3",
- "PublicDescription": "Cycles where at least 3 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1067,7 +1043,7 @@
"CounterMask": "4",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.CYCLES_GE_4",
- "PublicDescription": "Cycles where at least 4 uops were executed per-thread. Available PDIST counters: 0",
+ "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1078,7 +1054,7 @@
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.STALLS",
"Invert": "1",
- "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1087,7 +1063,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.THREAD",
- "PublicDescription": "Counts the number of uops to be executed per-thread each cycle. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1096,7 +1071,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb1",
"EventName": "UOPS_EXECUTED.X87",
- "PublicDescription": "Counts the number of x87 uops executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of x87 uops executed.",
"SampleAfterValue": "2000003",
"UMask": "0x10"
},
@@ -1105,7 +1080,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.ANY",
- "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1115,7 +1090,6 @@
"CounterMask": "1",
"EventCode": "0xae",
"EventName": "UOPS_ISSUED.CYCLES",
- "PublicDescription": "UOPS_ISSUED.CYCLES Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1125,7 +1099,7 @@
"CounterMask": "1",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.CYCLES",
- "PublicDescription": "Counts cycles where at least one uop has retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where at least one uop has retired.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
},
@@ -1134,7 +1108,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.HEAVY",
- "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.",
"SampleAfterValue": "2000003",
"UMask": "0x1"
},
@@ -1145,7 +1119,6 @@
"EventName": "UOPS_RETIRED.MS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PublicDescription": "UOPS_RETIRED.MS Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4"
},
@@ -1154,7 +1127,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.SLOTS",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric. Software can use this event as the numerator for the Retiring metric (or top-level category) of the Top-down Microarchitecture Analysis method. Available PDIST counters: 0",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that are utilized by operations that eventually get retired (committed) by the processor pipeline. Usually, this event positively correlates with higher performance for example, as measured by the instructions-per-cycle metric. Software can use this event as the numerator for the Retiring metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "2000003",
"UMask": "0x2"
},
@@ -1165,7 +1138,7 @@
"EventCode": "0xc2",
"EventName": "UOPS_RETIRED.STALLS",
"Invert": "1",
- "PublicDescription": "This event counts cycles without actually retired uops. Available PDIST counters: 0",
+ "PublicDescription": "This event counts cycles without actually retired uops.",
"SampleAfterValue": "1000003",
"UMask": "0x2"
}
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-cache.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-cache.json
index b782f6d54fc2..721fc42797b1 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-cache.json
@@ -10,6 +10,15 @@
"Unit": "CHACMS"
},
{
+ "BriefDescription": "UNC_CHACMS_DISTRESS_ASSERTED",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x35",
+ "EventName": "UNC_CHACMS_DISTRESS_ASSERTED",
+ "PerPkg": "1",
+ "PortMask": "0x000",
+ "Unit": "CHACMS"
+ },
+ {
"BriefDescription": "Counts the number of cycles FAST trigger is received from the global FAST distress wire.",
"Counter": "0,1,2,3",
"EventCode": "0x34",
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json
index 6667fbc50452..5eb1145f204f 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-interconnect.json
@@ -833,12 +833,20 @@
"Counter": "0,1,2,3",
"EventCode": "0x1F",
"EventName": "UNC_I_MISC1.LOST_FWD",
- "Experimental": "1",
"PerPkg": "1",
"UMask": "0x10",
"Unit": "IRP"
},
{
+ "BriefDescription": "Misc Events - Set 1 : Received Invalid : Secondary received a transfer that did not have sufficient MESI state",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x1F",
+ "EventName": "UNC_I_MISC1.SEC_RCVD_INVLD",
+ "PerPkg": "1",
+ "UMask": "0x20",
+ "Unit": "IRP"
+ },
+ {
"BriefDescription": "Snoop Hit E/S responses",
"Counter": "0,1,2,3",
"EventCode": "0x12",
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json
index f4f956966e16..2ea2637df3fb 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-io.json
@@ -1321,7 +1321,6 @@
"FCMask": "0x01",
"PerPkg": "1",
"PortMask": "0x0FF",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IIO"
},
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-memory.json b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-memory.json
index b991f6e1afbe..f559e27e2815 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/uncore-memory.json
@@ -60,6 +60,33 @@
"BriefDescription": "CAS count for SubChannel 0 regular reads",
"Counter": "0,1,2,3",
"EventCode": "0x05",
+ "EventName": "UNC_M_CAS_COUNT_SCH0.RD_NON_UNDERFILL",
+ "PerPkg": "1",
+ "UMask": "0xc3",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 0 auto-precharge reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x05",
+ "EventName": "UNC_M_CAS_COUNT_SCH0.RD_PRE_REG",
+ "PerPkg": "1",
+ "UMask": "0xc2",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 0 auto-precharge underfill reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x05",
+ "EventName": "UNC_M_CAS_COUNT_SCH0.RD_PRE_UNDERFILL",
+ "PerPkg": "1",
+ "UMask": "0xc8",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 0 regular reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x05",
"EventName": "UNC_M_CAS_COUNT_SCH0.RD_REG",
"PerPkg": "1",
"UMask": "0xc1",
@@ -75,6 +102,15 @@
"Unit": "IMC"
},
{
+ "BriefDescription": "CAS count for SubChannel 0 underfill reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x05",
+ "EventName": "UNC_M_CAS_COUNT_SCH0.RD_UNDERFILL_ALL",
+ "PerPkg": "1",
+ "UMask": "0xcc",
+ "Unit": "IMC"
+ },
+ {
"BriefDescription": "CAS count for SubChannel 0, all writes",
"Counter": "0,1,2,3",
"EventCode": "0x05",
@@ -125,6 +161,33 @@
"BriefDescription": "CAS count for SubChannel 1 regular reads",
"Counter": "0,1,2,3",
"EventCode": "0x06",
+ "EventName": "UNC_M_CAS_COUNT_SCH1.RD_NON_UNDERFILL",
+ "PerPkg": "1",
+ "UMask": "0xc3",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 1 auto-precharge reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x06",
+ "EventName": "UNC_M_CAS_COUNT_SCH1.RD_PRE_REG",
+ "PerPkg": "1",
+ "UMask": "0xc2",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 1 auto-precharge underfill reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x06",
+ "EventName": "UNC_M_CAS_COUNT_SCH1.RD_PRE_UNDERFILL",
+ "PerPkg": "1",
+ "UMask": "0xc8",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "CAS count for SubChannel 1 regular reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x06",
"EventName": "UNC_M_CAS_COUNT_SCH1.RD_REG",
"PerPkg": "1",
"UMask": "0xc1",
@@ -140,6 +203,15 @@
"Unit": "IMC"
},
{
+ "BriefDescription": "CAS count for SubChannel 1 underfill reads",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x06",
+ "EventName": "UNC_M_CAS_COUNT_SCH1.RD_UNDERFILL_ALL",
+ "PerPkg": "1",
+ "UMask": "0xcc",
+ "Unit": "IMC"
+ },
+ {
"BriefDescription": "CAS count for SubChannel 1, all writes",
"Counter": "0,1,2,3",
"EventCode": "0x06",
@@ -189,13 +261,52 @@
"Unit": "IMC"
},
{
+ "BriefDescription": "PMMNT is sending REF* commands while being in specified Refresh rate",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x72",
+ "EventName": "UNC_M_MNTCMD_REFRATE.REFAB1X",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x1",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "PMMNT is sending REF* commands while being in specified Refresh rate",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x72",
+ "EventName": "UNC_M_MNTCMD_REFRATE.REFAB2X",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x4",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "PMMNT is sending REF* commands while being in specified Refresh rate",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x72",
+ "EventName": "UNC_M_MNTCMD_REFRATE.REFSB1X",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x2",
+ "Unit": "IMC"
+ },
+ {
+ "BriefDescription": "PMMNT is sending REF* commands while being in specified Refresh rate",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x72",
+ "EventName": "UNC_M_MNTCMD_REFRATE.REFSB2X",
+ "Experimental": "1",
+ "PerPkg": "1",
+ "UMask": "0x8",
+ "Unit": "IMC"
+ },
+ {
"BriefDescription": "# of cycles MR4 temp readings forced 2x refresh",
"Counter": "0,1,2,3",
"EventCode": "0xA7",
"EventName": "UNC_M_MR4_2XREF_CYCLES.SCH0_DIMM0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -206,7 +317,6 @@
"EventName": "UNC_M_MR4_2XREF_CYCLES.SCH0_DIMM1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -217,7 +327,6 @@
"EventName": "UNC_M_MR4_2XREF_CYCLES.SCH1_DIMM0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IMC"
},
@@ -228,7 +337,6 @@
"EventName": "UNC_M_MR4_2XREF_CYCLES.SCH1_DIMM1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x8",
"Unit": "IMC"
},
@@ -239,7 +347,6 @@
"EventName": "UNC_M_PDC_MR4ACTIVE_CYCLES.SCH0_DIMM0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -250,7 +357,6 @@
"EventName": "UNC_M_PDC_MR4ACTIVE_CYCLES.SCH0_DIMM1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -261,7 +367,6 @@
"EventName": "UNC_M_PDC_MR4ACTIVE_CYCLES.SCH1_DIMM0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IMC"
},
@@ -272,7 +377,6 @@
"EventName": "UNC_M_PDC_MR4ACTIVE_CYCLES.SCH1_DIMM1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x8",
"Unit": "IMC"
},
@@ -283,7 +387,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH0_RANK0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -294,7 +397,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH0_RANK1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -305,7 +407,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH0_RANK2",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IMC"
},
@@ -316,7 +417,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH0_RANK3",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x8",
"Unit": "IMC"
},
@@ -327,7 +427,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH1_RANK0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x10",
"Unit": "IMC"
},
@@ -338,7 +437,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH1_RANK1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x20",
"Unit": "IMC"
},
@@ -349,7 +447,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH1_RANK2",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x40",
"Unit": "IMC"
},
@@ -360,7 +457,6 @@
"EventName": "UNC_M_POWERDOWN_CYCLES.SCH1_RANK3",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x80",
"Unit": "IMC"
},
@@ -371,7 +467,6 @@
"EventName": "UNC_M_POWER_CHANNEL_PPD_CYCLES",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"Unit": "IMC"
},
{
@@ -381,7 +476,6 @@
"EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -392,7 +486,6 @@
"EventName": "UNC_M_POWER_CRITICAL_THROTTLE_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -423,7 +516,6 @@
"EventName": "UNC_M_POWER_THROTTLE_CYCLES.MR4BLKEN",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x8",
"Unit": "IMC"
},
@@ -434,7 +526,6 @@
"EventName": "UNC_M_POWER_THROTTLE_CYCLES.RAPLBLK",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x4",
"Unit": "IMC"
},
@@ -615,7 +706,6 @@
"EventName": "UNC_M_SELF_REFRESH.ENTER_SUCCESS",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "UNC_M_SELF_REFRESH.ENTER_SUCCESS",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -626,7 +716,6 @@
"EventName": "UNC_M_SELF_REFRESH.ENTER_SUCCESS_CYCLES",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -637,7 +726,6 @@
"EventName": "UNC_M_THROTTLE_CRIT_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -648,7 +736,6 @@
"EventName": "UNC_M_THROTTLE_CRIT_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -659,7 +746,6 @@
"EventName": "UNC_M_THROTTLE_HIGH_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -670,7 +756,6 @@
"EventName": "UNC_M_THROTTLE_HIGH_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -681,7 +766,6 @@
"EventName": "UNC_M_THROTTLE_LOW_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -692,7 +776,6 @@
"EventName": "UNC_M_THROTTLE_LOW_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
@@ -703,7 +786,6 @@
"EventName": "UNC_M_THROTTLE_MID_CYCLES.SLOT0",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x1",
"Unit": "IMC"
},
@@ -714,7 +796,6 @@
"EventName": "UNC_M_THROTTLE_MID_CYCLES.SLOT1",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "-",
"UMask": "0x2",
"Unit": "IMC"
},
diff --git a/tools/perf/pmu-events/arch/x86/graniterapids/virtual-memory.json b/tools/perf/pmu-events/arch/x86/graniterapids/virtual-memory.json
index 3d3f88600e26..609a9549cbf3 100644
--- a/tools/perf/pmu-events/arch/x86/graniterapids/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/graniterapids/virtual-memory.json
@@ -4,7 +4,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.STLB_HIT",
- "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -14,7 +14,7 @@
"CounterMask": "1",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -23,7 +23,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -32,7 +32,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -41,7 +41,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -50,7 +50,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -59,7 +59,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x12",
"EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -68,7 +68,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.STLB_HIT",
- "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -78,7 +78,7 @@
"CounterMask": "1",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -87,7 +87,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -96,7 +96,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
- "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x8"
},
@@ -105,7 +105,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -114,7 +114,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -123,7 +123,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x13",
"EventName": "DTLB_STORE_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -132,7 +132,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.STLB_HIT",
- "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB). Available PDIST counters: 0",
+ "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
"SampleAfterValue": "100003",
"UMask": "0x20"
},
@@ -142,7 +142,7 @@
"CounterMask": "1",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_ACTIVE",
- "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
"SampleAfterValue": "100003",
"UMask": "0x10"
},
@@ -151,7 +151,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED",
- "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0xe"
},
@@ -160,7 +160,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
- "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x4"
},
@@ -169,7 +169,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
- "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault. Available PDIST counters: 0",
+ "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
"SampleAfterValue": "100003",
"UMask": "0x2"
},
@@ -178,7 +178,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x11",
"EventName": "ITLB_MISSES.WALK_PENDING",
- "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
"SampleAfterValue": "100003",
"UMask": "0x10"
}
diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
index b26ea70a3628..aebd82ced1cf 100644
--- a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -80,7 +80,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -98,7 +97,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -139,7 +137,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -509,7 +506,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -521,7 +518,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -696,7 +693,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -746,7 +742,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -856,7 +852,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_thread_clks",
+ "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -865,7 +861,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -874,7 +870,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -883,7 +879,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
index 8245a98ad4b9..b8845f8a28b9 100644
--- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -282,7 +282,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5 + UOPS_DISPATCHED_PORT.PORT_6) / tma_info_thread_slots",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -300,7 +299,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -341,7 +339,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -711,7 +708,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -723,7 +720,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -756,6 +753,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
@@ -918,7 +916,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 + UOPS_DISPATCHED_PORT.PORT_7 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -928,6 +925,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "200 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_mem",
@@ -977,7 +975,7 @@
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
+ "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING) + RESOURCE_STALLS.SB) / (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) * tma_backend_bound",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -1087,7 +1085,7 @@
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "(min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_thread_clks",
+ "MetricExpr": "((min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@)) / 2 - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) + cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - (cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@ if tma_info_thread_ipc > 1.8 else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0) + RESOURCE_STALLS.SB) - RESOURCE_STALLS.SB - min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.STALLS_LDM_PENDING)) / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
"MetricThreshold": "tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2)",
@@ -1096,7 +1094,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1105,7 +1103,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1114,7 +1112,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1141,6 +1139,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "310 * (MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.L3_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_mem",
diff --git a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
index c5bfdb2f288b..cf9ed3edb694 100644
--- a/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelake/icl-metrics.json
@@ -1,63 +1,63 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c8\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C8_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C9 residency percent per package",
- "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c9\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C9_Pkg_Residency",
"ScaleUnit": "100%"
@@ -85,7 +85,6 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
@@ -134,6 +133,7 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFootprint;BvBC;Fed;Frontend;IcMiss;MemoryTLB",
"MetricName": "tma_bottleneck_big_code",
@@ -148,39 +148,44 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20"
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -188,6 +193,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
@@ -196,6 +202,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
@@ -204,6 +211,7 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;BvMP;tma_issueBM",
"MetricName": "tma_bottleneck_mispredictions",
@@ -212,7 +220,8 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -220,6 +229,7 @@
},
{
"BriefDescription": "Total pipeline cost of \"useful operations\" - the portion of Retiring category not covered by Branching_Overhead nor Irregular_Overhead.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + 2 * BR_INST_RETIRED.NEAR_CALL + INST_RETIRED.NOP) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "BvUW;Ret",
"MetricName": "tma_bottleneck_useful_work",
@@ -427,7 +437,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -619,6 +629,7 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound * (tma_fetch_bandwidth / (tma_fetch_bandwidth + tma_fetch_latency)) * (tma_dsb / (tma_dsb + tma_lsd + tma_mite + tma_ms)))",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_bandwidth",
@@ -1068,7 +1079,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1092,6 +1103,12 @@
"MetricName": "tma_info_pipeline_fetch_mite"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "IDQ.MS_UOPS / cpu@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "INST_RETIRED.ANY / ASSISTS.ANY",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1107,7 +1124,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1119,7 +1136,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -1128,7 +1145,7 @@
"MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1e6 / tma_info_system_time / 1e3",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1296,12 +1313,12 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1325,7 +1342,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1339,7 +1355,7 @@
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1445,7 +1461,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1454,7 +1470,7 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1522,7 +1538,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "cpu@IDQ.MS_UOPS\\,cmask\\=1@ / tma_info_core_core_clks / 2",
+ "MetricExpr": "cpu@IDQ.MS_UOPS\\,cmask\\=1@ / tma_info_core_core_clks / 3.3",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -1656,7 +1672,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -1693,7 +1709,6 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -1707,7 +1722,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
@@ -1721,7 +1736,6 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
index a886a0cfee07..f58eec2a1788 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/icx-metrics.json
@@ -1,28 +1,28 @@
[
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
@@ -79,6 +79,12 @@
"ScaleUnit": "1MB/s"
},
{
+ "BriefDescription": "Bandwidth of inbound IO reads that are initiated by end device controllers that are requesting memory from the CPU and miss the L3 cache",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_read_l3_miss",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Bandwidth of IO reads that are initiated by end device controllers that are requesting memory from the local CPU socket",
"MetricExpr": "UNC_CHA_TOR_INSERTS.IO_PCIRDCUR_LOCAL * 64 / 1e6 / duration_time",
"MetricName": "io_bandwidth_read_local",
@@ -97,6 +103,12 @@
"ScaleUnit": "1MB/s"
},
{
+ "BriefDescription": "Bandwidth of inbound IO writes that are initiated by end device controllers that are writing memory to the CPU",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOM + UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR) * 64 / 1e6 / duration_time",
+ "MetricName": "io_bandwidth_write_l3_miss",
+ "ScaleUnit": "1MB/s"
+ },
+ {
"BriefDescription": "Bandwidth of IO writes that are initiated by end device controllers that are writing memory to the local CPU socket",
"MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_ITOM_LOCAL + UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR_LOCAL) * 64 / 1e6 / duration_time",
"MetricName": "io_bandwidth_write_local",
@@ -109,6 +121,24 @@
"ScaleUnit": "1MB/s"
},
{
+ "BriefDescription": "The percent of inbound full cache line writes initiated by IO that miss the L3 cache",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_ITOM / UNC_CHA_TOR_INSERTS.IO_ITOM",
+ "MetricName": "io_full_write_l3_miss",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "The percent of inbound partial writes initiated by IO that miss the L3 cache",
+ "MetricExpr": "(UNC_CHA_TOR_INSERTS.IO_MISS_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_MISS_RFO) / (UNC_CHA_TOR_INSERTS.IO_ITOMCACHENEAR + UNC_CHA_TOR_INSERTS.IO_RFO)",
+ "MetricName": "io_partial_write_l3_miss",
+ "ScaleUnit": "100%"
+ },
+ {
+ "BriefDescription": "The percent of inbound reads initiated by IO that miss the L3 cache",
+ "MetricExpr": "UNC_CHA_TOR_INSERTS.IO_MISS_PCIRDCUR / UNC_CHA_TOR_INSERTS.IO_PCIRDCUR",
+ "MetricName": "io_read_l3_miss",
+ "ScaleUnit": "100%"
+ },
+ {
"BriefDescription": "Ratio of number of completed page walks (for 2 megabyte and 4 megabyte page sizes) caused by a code fetch to the total number of completed instructions",
"MetricExpr": "ITLB_MISSES.WALK_COMPLETED_2M_4M / INST_RETIRED.ANY",
"MetricName": "itlb_2nd_level_large_page_mpi",
@@ -331,7 +361,6 @@
},
{
"BriefDescription": "This metric estimates how often memory load accesses were aliased by preceding stores (in program order) with a 4K address offset",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_4k_aliasing",
@@ -380,6 +409,7 @@
},
{
"BriefDescription": "Total pipeline cost of instruction fetch related bottlenecks by large code footprint programs (i-side cache; TLB and BTB misses)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * tma_fetch_latency * (tma_itlb_misses + tma_icache_misses + tma_unknown_branches) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches)",
"MetricGroup": "BigFootprint;BvBC;Fed;Frontend;IcMiss;MemoryTLB",
"MetricName": "tma_bottleneck_big_code",
@@ -394,39 +424,44 @@
"PublicDescription": "Total pipeline cost of instructions used for program control-flow - a subset of the Retiring category in TMA. Examples include function calls; loops and alignments. (A lower bound)"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: "
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20"
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_mispredicts_resteers) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + tma_core_bound * RS_EVENTS.EMPTY_CYCLES / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -434,7 +469,8 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / max(tma_memory_bound, tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / max(tma_l1_bound, tma_4k_aliasing + tma_dtlb_load + tma_fb_full + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
"MetricThreshold": "tma_bottleneck_memory_data_tlbs > 20",
@@ -442,7 +478,8 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Synchronization related bottlenecks (data transfers and coherency updates across processors)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) * tma_remote_cache / (tma_local_mem + tma_remote_cache + tma_remote_mem) + tma_l3_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * (tma_contested_accesses + tma_data_sharing) / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full) + tma_store_bound / (tma_cxl_mem_bound + tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) * tma_false_sharing / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores - tma_store_latency)) + tma_machine_clears * (1 - tma_other_nukes / tma_other_nukes))",
"MetricGroup": "BvMS;LockCont;Mem;Offcore;tma_issueSyncxn",
"MetricName": "tma_bottleneck_memory_synchronization",
"MetricThreshold": "tma_bottleneck_memory_synchronization > 10",
@@ -450,6 +487,7 @@
},
{
"BriefDescription": "Total pipeline cost of Branch Misprediction related bottlenecks",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * (tma_branch_mispredicts + tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches))",
"MetricGroup": "Bad;BadSpec;BrMispredicts;BvMP;tma_issueBM",
"MetricName": "tma_bottleneck_mispredictions",
@@ -458,7 +496,8 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -466,6 +505,7 @@
},
{
"BriefDescription": "Total pipeline cost of \"useful operations\" - the portion of Retiring category not covered by Branching_Overhead nor Irregular_Overhead.",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_retiring - (BR_INST_RETIRED.ALL_BRANCHES + 2 * BR_INST_RETIRED.NEAR_CALL + INST_RETIRED.NOP) / tma_info_thread_slots - tma_microcode_sequencer / (tma_few_uops_instructions + tma_microcode_sequencer) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "BvUW;Ret",
"MetricName": "tma_bottleneck_useful_work",
@@ -585,6 +625,15 @@
"ScaleUnit": "100%"
},
{
+ "BriefDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g",
+ "MetricExpr": "(((1 - ((19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS))) / (19 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + 10 * (MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) + MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS)) + (25 * (MEM_LOAD_RETIRED.LOCAL_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0) + 33 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS) if #has_pmem > 0 else 0))) if #has_pmem > 0 else 1)) * (CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound) if 1e6 * (MEM_LOAD_L3_MISS_RETIRED.REMOTE_PMM + MEM_LOAD_RETIRED.LOCAL_PMM) > MEM_LOAD_RETIRED.L1_MISS else 0) if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemoryBound;Server;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
+ "MetricName": "tma_cxl_mem_bound",
+ "MetricThreshold": "tma_cxl_mem_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
+ "PublicDescription": "This metric roughly estimates (based on idle latencies) how often the CPU was stalled on accesses to external CXL Memory by loads (e.g. 3D-Xpoint (Crystal Ridge, a.k.a. IXP) memory, PMM - Persistent Memory Module [from CLX to SPR] or any other CXL Type3 Memory [EMR onwards]).",
+ "ScaleUnit": "100%"
+ },
+ {
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to data-sharing accesses",
"MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "43.5 * tma_info_system_core_frequency * (MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT + MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM * (1 - OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM / (OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM + OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD))) * (1 + MEM_LOAD_RETIRED.FB_HIT / MEM_LOAD_RETIRED.L1_MISS / 2) / tma_info_thread_clks",
@@ -615,7 +664,7 @@
{
"BriefDescription": "This metric estimates how often the CPU was stalled on accesses to external memory (DRAM) by loads",
"MetricConstraint": "NO_GROUP_EVENTS",
- "MetricExpr": "CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound",
+ "MetricExpr": "(CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound - tma_cxl_mem_bound if #has_pmem > 0 else CYCLE_ACTIVITY.STALLS_L3_MISS / tma_info_thread_clks + (CYCLE_ACTIVITY.STALLS_L1D_MISS - CYCLE_ACTIVITY.STALLS_L2_MISS) / tma_info_thread_clks - tma_l2_bound)",
"MetricGroup": "MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_dram_bound",
"MetricThreshold": "tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2)",
@@ -673,7 +722,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%"
},
{
@@ -865,6 +914,7 @@
},
{
"BriefDescription": "Total pipeline cost of DSB (uop cache) hits - subset of the Instruction_Fetch_BW Bottleneck",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "100 * (tma_frontend_bound * (tma_fetch_bandwidth / (tma_fetch_bandwidth + tma_fetch_latency)) * (tma_dsb / (tma_dsb + tma_mite + tma_ms)))",
"MetricGroup": "DSB;Fed;FetchBW;tma_issueFB",
"MetricName": "tma_info_botlnk_l2_dsb_bandwidth",
@@ -1320,7 +1370,7 @@
"MetricName": "tma_info_memory_tlb_store_stlb_mpki"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (UOPS_EXECUTED.CORE_CYCLES_GE_1 / 2 if #SMT_on else cpu@UOPS_EXECUTED.THREAD\\,cmask\\=1@)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -1338,6 +1388,12 @@
"MetricName": "tma_info_pipeline_fetch_mite"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "IDQ.MS_UOPS / cpu@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "INST_RETIRED.ANY / ASSISTS.ANY",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1353,7 +1409,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -1365,16 +1421,28 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
{
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_RPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_read_bw"
+ },
+ {
+ "BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
+ "MetricExpr": "(64 * UNC_M_PMM_WPQ_INSERTS / 1e9 / tma_info_system_time if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryBW;Server;SoC",
+ "MetricName": "tma_info_system_cxl_mem_write_bw"
+ },
+ {
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
"MetricExpr": "64 * (UNC_M_CAS_COUNT.RD + UNC_M_CAS_COUNT.WR) / 1e9 / tma_info_system_time",
"MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
"MetricName": "tma_info_system_dram_bw_use",
- "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full"
},
{
"BriefDescription": "Giga Floating Point Operations Per Second",
@@ -1433,12 +1501,20 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD@thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
"PublicDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches"
},
{
+ "BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]",
+ "MetricExpr": "(1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD_PMM / UNC_CHA_TOR_INSERTS.IA_MISS_DRD_PMM) / cha_0@event\\=0x0@ if #has_pmem > 0 else 0)",
+ "MetricGroup": "MemOffcore;MemoryLat;Server;SoC",
+ "MetricName": "tma_info_system_mem_pmm_read_latency",
+ "PublicDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches"
+ },
+ {
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds)",
"MetricExpr": "1e9 * (UNC_CHA_TOR_OCCUPANCY.IA_MISS_DRD / UNC_CHA_TOR_INSERTS.IA_MISS_DRD) / (tma_info_system_socket_clks / tma_info_system_time)",
"MetricGroup": "Mem;MemoryLat;SoC",
@@ -1590,12 +1666,12 @@
"ScaleUnit": "100%"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
"MetricExpr": "min(2 * (MEM_INST_RETIRED.ALL_LOADS - MEM_LOAD_RETIRED.FB_HIT - MEM_LOAD_RETIRED.L1_MISS) * 20 / 100, max(CYCLE_ACTIVITY.CYCLES_MEM_ANY - CYCLE_ACTIVITY.CYCLES_L1D_MISS, 0)) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_RETIRED.L1_HIT",
"ScaleUnit": "100%"
},
{
@@ -1619,7 +1695,6 @@
},
{
"BriefDescription": "This metric estimates how often the CPU was stalled due to loads accesses to L3 cache or contended with a sibling Core",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(CYCLE_ACTIVITY.STALLS_L2_MISS - CYCLE_ACTIVITY.STALLS_L3_MISS) / tma_info_thread_clks",
"MetricGroup": "CacheHits;MemoryBound;TmaL3mem;TopdownL3;tma_L3_group;tma_memory_bound_group",
"MetricName": "tma_l3_bound",
@@ -1633,7 +1708,7 @@
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%"
},
{
@@ -1739,7 +1814,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%"
},
{
@@ -1748,7 +1823,7 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%"
},
{
@@ -1816,7 +1891,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "cpu@IDQ.MS_UOPS\\,cmask\\=1@ / tma_info_core_core_clks / 2",
+ "MetricExpr": "cpu@IDQ.MS_UOPS\\,cmask\\=1@ / tma_info_core_core_clks / 3.3",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -1968,7 +2043,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound) + 0 * tma_info_thread_slots",
+ "MetricExpr": "topdown\\-retiring / (topdown\\-fe\\-bound + topdown\\-bad\\-spec + topdown\\-retiring + topdown\\-be\\-bound)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2005,7 +2080,6 @@
},
{
"BriefDescription": "This metric represents rate of split store accesses",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "MEM_INST_RETIRED.SPLIT_STORES / tma_info_core_core_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_issueSpSt;tma_store_bound_group",
"MetricName": "tma_split_stores",
@@ -2019,7 +2093,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%"
},
{
@@ -2033,7 +2107,6 @@
},
{
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * LD_BLOCKS.STORE_FORWARD / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_store_fwd_blk",
diff --git a/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json b/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json
index 6f84ad47276d..1c225192ba34 100644
--- a/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json
+++ b/tools/perf/pmu-events/arch/x86/icelakex/uncore-cache.json
@@ -6050,7 +6050,7 @@
"EventName": "UNC_CHA_SNOOP_RESP.RSPIFWD",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "Counts when a a transaction with the opcode type RspIFwd Snoop Response was received which indicates a remote caching agent forwarded the data and the requesting agent is able to acquire the data in E (Exclusive) or M (modified) states. This is commonly returned with RFO (the Read for Ownership issued before a write) transactions. The snoop could have either been to a cacheline in the M,E,F (Modified, Exclusive or Forward) states.",
+ "PublicDescription": "Counts when a transaction with the opcode type RspIFwd Snoop Response was received which indicates a remote caching agent forwarded the data and the requesting agent is able to acquire the data in E (Exclusive) or M (modified) states. This is commonly returned with RFO (the Read for Ownership issued before a write) transactions. The snoop could have either been to a cacheline in the M,E,F (Modified, Exclusive or Forward) states.",
"UMask": "0x4",
"Unit": "CHA"
},
@@ -6072,7 +6072,7 @@
"EventName": "UNC_CHA_SNOOP_RESP.RSPSFWD",
"Experimental": "1",
"PerPkg": "1",
- "PublicDescription": "Counts when a a transaction with the opcode type RspSFwd Snoop Response was received which indicates a remote caching agent forwarded the data but held on to its current copy. This is common for data and code reads that hit in a remote socket in E (Exclusive) or F (Forward) state.",
+ "PublicDescription": "Counts when a transaction with the opcode type RspSFwd Snoop Response was received which indicates a remote caching agent forwarded the data but held on to its current copy. This is common for data and code reads that hit in a remote socket in E (Exclusive) or F (Forward) state.",
"UMask": "0x8",
"Unit": "CHA"
},
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
index de651ff9f846..969cb519eec1 100644
--- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -80,7 +80,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5) / (3 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -98,7 +97,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -139,7 +137,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -561,7 +558,7 @@
"MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -574,7 +571,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -586,7 +583,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -775,7 +772,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -926,7 +922,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -935,7 +931,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -944,7 +940,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
index 714d5e6d21e7..1cdd197ac883 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -80,7 +80,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_0 + UOPS_DISPATCHED_PORT.PORT_1 + UOPS_DISPATCHED_PORT.PORT_5) / (3 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_alu_op_utilization",
@@ -98,7 +97,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -139,7 +137,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU retired uops originated from CISC (complex instruction set computer) instruction",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "max(0, tma_microcode_sequencer - tma_assists)",
"MetricGroup": "TopdownL4;tma_L4_group;tma_microcode_sequencer_group",
"MetricName": "tma_cisc",
@@ -561,7 +558,7 @@
"MetricThreshold": "tma_info_memory_tlb_page_walks_utilization > 0.5"
},
{
- "BriefDescription": "",
+ "BriefDescription": "Mem;Backend;CacheHits",
"MetricExpr": "UOPS_EXECUTED.THREAD / (cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC)",
"MetricGroup": "Cor;Pipeline;PortsUtil;SMT",
"MetricName": "tma_info_pipeline_execute"
@@ -574,7 +571,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -586,7 +583,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -626,6 +623,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
@@ -795,7 +793,6 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution port for Load operations",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "(UOPS_DISPATCHED_PORT.PORT_2 + UOPS_DISPATCHED_PORT.PORT_3 - UOPS_DISPATCHED_PORT.PORT_4) / (2 * tma_info_core_core_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
"MetricName": "tma_load_op_utilization",
@@ -805,6 +802,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from local memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "200 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_local_mem",
@@ -955,7 +953,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else (min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks)",
+ "MetricExpr": "(cpu@UOPS_EXECUTED.CORE\\,inv\\,cmask\\=1@ / 2 if #SMT_on else min(CPU_CLK_UNHALTED.THREAD, CYCLE_ACTIVITY.CYCLES_NO_EXECUTE) - (RS_EVENTS.EMPTY_CYCLES if tma_fetch_latency > 0.1 else 0)) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
"MetricThreshold": "tma_ports_utilized_0 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -964,7 +962,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=1@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC - UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
"MetricThreshold": "tma_ports_utilized_1 > 0.2 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -973,7 +971,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else (UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks)",
+ "MetricExpr": "((cpu@UOPS_EXECUTED.CORE\\,cmask\\=2@ - cpu@UOPS_EXECUTED.CORE\\,cmask\\=3@) / 2 if #SMT_on else UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC - UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC) / tma_info_core_core_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
"MetricThreshold": "tma_ports_utilized_2 > 0.15 & (tma_ports_utilization > 0.15 & (tma_core_bound > 0.1 & tma_backend_bound > 0.2))",
@@ -1000,6 +998,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling loads from remote memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "310 * (MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM * (1 + MEM_LOAD_UOPS_RETIRED.HIT_LFB / (MEM_LOAD_UOPS_RETIRED.L2_HIT + MEM_LOAD_UOPS_RETIRED.LLC_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS + MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_DRAM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_HITM + MEM_LOAD_UOPS_LLC_MISS_RETIRED.REMOTE_FWD))) / tma_info_thread_clks",
"MetricGroup": "Server;Snoop;TopdownL5;tma_L5_group;tma_mem_latency_group",
"MetricName": "tma_remote_mem",
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
index 6f636ea0f216..250c73b21385 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -1,49 +1,49 @@
[
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per core",
- "MetricExpr": "cstate_core@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c3\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C3_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Pkg_Residency",
"ScaleUnit": "100%"
@@ -71,7 +71,6 @@
},
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "1 - (tma_frontend_bound + tma_bad_speculation + tma_retiring)",
"MetricGroup": "BvOB;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
@@ -296,7 +295,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency"
},
@@ -308,7 +307,7 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized"
},
@@ -348,6 +347,7 @@
},
{
"BriefDescription": "Average number of parallel data read requests to external memory",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182@ / UNC_C_TOR_OCCUPANCY.MISS_OPCODE@filter_opc\\=0x182\\,thresh\\=1@",
"MetricGroup": "Mem;MemoryBW;SoC",
"MetricName": "tma_info_system_mem_parallel_reads",
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/cache.json b/tools/perf/pmu-events/arch/x86/lunarlake/cache.json
index ff37d49611c3..3d2616be8ec1 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/cache.json
@@ -29,6 +29,16 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Cachelines replaced into the L1 d-cache. Successful replacements only (not blocked) and exclude WB-miss case",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x51",
+ "EventName": "L1D.L1_REPLACEMENT",
+ "PublicDescription": "Counts cachelines replaced into the L1 d-cache.",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x4",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Cachelines replaced into the L0 and L1 d-cache. Successful replacements only (not blocked) and exclude WB-miss case",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x51",
@@ -233,7 +243,7 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of L2 prefetches initiated by either the L2 Stream or AMP that were throttled due to exceeding the XQ threshold set by either XQ_THRESOLD_DTP or XQ_THRESHOLD. Counts on a per core basis.",
+ "BriefDescription": "Counts the number of L2 prefetches initiated by either the L2 Stream or AMP that were throttled due to exceeding the XQ threshold set by either XQ_THRESHOLD_DTP or XQ_THRESHOLD. Counts on a per core basis.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x28",
"EventName": "L2_PREFETCHES_THROTTLED.XQ_THRESH",
@@ -454,7 +464,7 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of LLC prefetches throttled due to exceeding the XQ threshold set by either XQ_THRESOLD_DTP or LLC_XQ_THRESHOLD. Counts on a per core basis.",
+ "BriefDescription": "Counts the number of LLC prefetches throttled due to exceeding the XQ threshold set by either XQ_THRESHOLD_DTP or LLC_XQ_THRESHOLD. Counts on a per core basis.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x29",
"EventName": "LLC_PREFETCHES_THROTTLED.XQ_THRESH",
@@ -592,7 +602,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_LOADS",
- "PublicDescription": "Counts Instructions with at least one architecturally visible load retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts Instructions with at least one architecturally visible load retired. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x81",
"Unit": "cpu_core"
@@ -603,7 +613,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_STORES",
- "PublicDescription": "Counts all retired store instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired store instructions. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x82",
"Unit": "cpu_core"
@@ -613,7 +623,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ALL_SWPF",
- "PublicDescription": "Counts all retired software prefetch instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired software prefetch instructions. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x84",
"Unit": "cpu_core"
@@ -624,7 +634,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.ANY",
- "PublicDescription": "Counts all retired memory instructions - loads and stores. Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired memory instructions - loads and stores. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x87",
"Unit": "cpu_core"
@@ -635,7 +645,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.LOCK_LOADS",
- "PublicDescription": "Counts retired load instructions with locked access. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with locked access. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x21",
"Unit": "cpu_core"
@@ -646,7 +656,7 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
- "PublicDescription": "Counts retired load instructions that split across a cacheline boundary. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions that split across a cacheline boundary. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x41",
"Unit": "cpu_core"
@@ -657,18 +667,29 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.SPLIT_STORES",
- "PublicDescription": "Counts retired store instructions that split across a cacheline boundary. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired store instructions that split across a cacheline boundary. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x42",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired instructions that hit the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_ANY",
+ "PublicDescription": "Number of retired instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0xf",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions that hit the STLB.",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_HIT_LOADS",
- "PublicDescription": "Number of retired load instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired load instructions with a clean hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x9",
"Unit": "cpu_core"
@@ -679,18 +700,39 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_HIT_STORES",
- "PublicDescription": "Number of retired store instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired store instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0xa",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired SWPF instructions that hit the STLB.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_HIT_SWPF",
+ "PublicDescription": "Number of retired SWPF instructions that hit in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0xc",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Retired instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "Data_LA": "1",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_ANY",
+ "PublicDescription": "Retired instructions that miss the STLB. Available PDIST counters: 0,1",
+ "SampleAfterValue": "100003",
+ "UMask": "0x17",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions that miss the STLB.",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
- "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x11",
"Unit": "cpu_core"
@@ -701,18 +743,28 @@
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
- "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0",
+ "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x12",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Retired SWPF instructions that miss the STLB.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0xd0",
+ "EventName": "MEM_INST_RETIRED.STLB_MISS_SWPF",
+ "PublicDescription": "Number of retired SWPF instructions that (start a) miss in the 2nd-level TLB (STLB). Available PDIST counters: 0,1",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x14",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$)",
"Counter": "0,1,2,3",
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD",
- "PublicDescription": "Counts retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$) Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions whose data sources were a cross-core Snoop hits and forwards data from an in on-package core cache (induced by NI$) Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -723,7 +775,7 @@
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
- "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3, Hit-with-FWD is normally excluded. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3, Hit-with-FWD is normally excluded. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -734,7 +786,7 @@
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
- "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -745,7 +797,7 @@
"Data_LA": "1",
"EventCode": "0xd2",
"EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD",
- "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache. Available PDIST counters: 0,1",
"SampleAfterValue": "20011",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -756,7 +808,7 @@
"Data_LA": "1",
"EventCode": "0xd3",
"EventName": "MEM_LOAD_L3_MISS_RETIRED.MEMSIDE_CACHE",
- "PublicDescription": "Retired load instructions which data source is memory side cache. Available PDIST counters: 0",
+ "PublicDescription": "Retired load instructions which data source is memory side cache. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"Unit": "cpu_core"
},
@@ -766,7 +818,7 @@
"Data_LA": "1",
"EventCode": "0xd4",
"EventName": "MEM_LOAD_MISC_RETIRED.UC",
- "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock). Available PDIST counters: 0",
+ "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock). Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -777,7 +829,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.FB_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -788,7 +840,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x101",
"Unit": "cpu_core"
@@ -799,7 +851,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT_L0",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 0 of the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -809,7 +861,7 @@
"Counter": "0,1,2,3",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_HIT_L1",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 1 of the L1 data cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the Level 1 of the L1 data cache. Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"Unit": "cpu_core"
},
@@ -819,7 +871,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L1_MISS",
- "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache. Available PDIST counters: 0,1",
"SampleAfterValue": "200003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -830,7 +882,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L2_HIT",
- "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources. Available PDIST counters: 0,1",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -841,7 +893,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L2_MISS",
- "PublicDescription": "Counts retired load instructions missed L2 cache as data sources. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions missed L2 cache as data sources. Available PDIST counters: 0,1",
"SampleAfterValue": "100021",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -852,7 +904,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L3_HIT",
- "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache. Available PDIST counters: 0,1",
"SampleAfterValue": "100021",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -863,7 +915,7 @@
"Data_LA": "1",
"EventCode": "0xd1",
"EventName": "MEM_LOAD_RETIRED.L3_MISS",
- "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache. Available PDIST counters: 0,1",
"SampleAfterValue": "50021",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -1037,7 +1089,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
@@ -1049,7 +1101,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
@@ -1061,7 +1113,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
@@ -1073,7 +1125,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
@@ -1085,7 +1137,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
@@ -1097,7 +1149,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
@@ -1109,7 +1161,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
@@ -1121,7 +1173,7 @@
},
{
"BriefDescription": "Counts the number of tagged load uops retired that exceed the latency threshold defined in MEC_CR_PEBS_LD_LAT_THRESHOLD - Only counts with PEBS enabled",
- "Counter": "0,1,2,3,4,5,6,7",
+ "Counter": "0,1",
"Data_LA": "1",
"EventCode": "0xd0",
"EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
@@ -1254,6 +1306,18 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts writebacks of modified cachelines that were supplied by the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_M.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E00008",
+ "PublicDescription": "Counts writebacks of modified cachelines that were supplied by the L3 cache. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts writebacks of non-modified cachelines that have any type of response.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xB7",
@@ -1266,6 +1330,18 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts writebacks of non-modified cachelines that were supplied by the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_NONM.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E01000",
+ "PublicDescription": "Counts writebacks of non-modified cachelines that were supplied by the L3 cache. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any type of response.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xB7",
@@ -1303,7 +1379,7 @@
},
{
"BriefDescription": "Counts demand data reads that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -1315,7 +1391,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -1327,7 +1403,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop hit in another cores caches which forwarded the unmodified data to the requesting core.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
"MSRIndex": "0x1a6,0x1a7",
@@ -1363,7 +1439,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -1375,7 +1451,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop hit in another cores caches, data forwarding is required as the data is modified.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
"MSRIndex": "0x1a6,0x1a7",
@@ -1386,6 +1462,18 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that were supplied by the L3 cache.",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x7E001E04477",
+ "PublicDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that were supplied by the L3 cache. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Any memory transaction that reached the SQ.",
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x21",
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/frontend.json b/tools/perf/pmu-events/arch/x86/lunarlake/frontend.json
index e2facc4086e9..b21d602e9f1a 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/frontend.json
@@ -108,7 +108,7 @@
"EventName": "FRONTEND_RETIRED.ANY_ANT",
"MSRIndex": "0x3F7",
"MSRValue": "0x9",
- "PublicDescription": "Always Not Taken (ANT) conditional retired branches (no BTB entry and not mispredicted) Available PDIST counters: 0",
+ "PublicDescription": "Always Not Taken (ANT) conditional retired branches (no BTB entry and not mispredicted) Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -120,7 +120,7 @@
"EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x1",
- "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -169,7 +169,7 @@
"EventName": "FRONTEND_RETIRED.DSB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x11",
- "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss. Available PDIST counters: 0",
+ "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -199,7 +199,7 @@
"EventName": "FRONTEND_RETIRED.ITLB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x14",
- "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -211,7 +211,7 @@
"EventName": "FRONTEND_RETIRED.L1I_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x12",
- "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -223,7 +223,7 @@
"EventName": "FRONTEND_RETIRED.L2_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x13",
- "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -235,7 +235,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
"MSRIndex": "0x3F7",
"MSRValue": "0x608006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -247,7 +247,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
"MSRIndex": "0x3F7",
"MSRValue": "0x601006",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -259,7 +259,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
"MSRIndex": "0x3F7",
"MSRValue": "0x600206",
- "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -271,7 +271,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
"MSRIndex": "0x3F7",
"MSRValue": "0x610006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -283,7 +283,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
"MSRIndex": "0x3F7",
"MSRValue": "0x100206",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -295,7 +295,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
"MSRIndex": "0x3F7",
"MSRValue": "0x602006",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -307,7 +307,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
"MSRIndex": "0x3F7",
"MSRValue": "0x600406",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -319,7 +319,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
"MSRIndex": "0x3F7",
"MSRValue": "0x620006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -331,7 +331,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
"MSRIndex": "0x3F7",
"MSRValue": "0x604006",
- "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -343,7 +343,7 @@
"EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
"MSRIndex": "0x3F7",
"MSRValue": "0x600806",
- "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -355,7 +355,7 @@
"EventName": "FRONTEND_RETIRED.MISP_ANT",
"MSRIndex": "0x3F7",
"MSRValue": "0x9",
- "PublicDescription": "ANT retired branches that got just mispredicted Available PDIST counters: 0",
+ "PublicDescription": "ANT retired branches that got just mispredicted Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -367,7 +367,7 @@
"EventName": "FRONTEND_RETIRED.MS_FLOWS",
"MSRIndex": "0x3F7",
"MSRValue": "0x8",
- "PublicDescription": "Counts flows delivered by the Microcode Sequencer Available PDIST counters: 0",
+ "PublicDescription": "Counts flows delivered by the Microcode Sequencer Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -397,7 +397,7 @@
"EventName": "FRONTEND_RETIRED.STLB_MISS",
"MSRIndex": "0x3F7",
"MSRValue": "0x15",
- "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -409,7 +409,7 @@
"EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
"MSRIndex": "0x3F7",
"MSRValue": "0x17",
- "PublicDescription": "Number retired branch instructions that caused the front-end to be resteered when it finds the instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number retired branch instructions that caused the front-end to be resteered when it finds the instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x3",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/lnl-metrics.json b/tools/perf/pmu-events/arch/x86/lunarlake/lnl-metrics.json
index 3c740962e63e..06390a72110d 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/lnl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/lnl-metrics.json
@@ -1,75 +1,47 @@
[
{
"BriefDescription": "C10 residency percent per package",
- "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c10\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C10_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C1 residency percent per core",
- "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c1\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C1_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C2 residency percent per package",
- "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c2\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C2_Pkg_Residency",
"ScaleUnit": "100%"
},
{
- "BriefDescription": "C3 residency percent per package",
- "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
- "MetricGroup": "Power",
- "MetricName": "C3_Pkg_Residency",
- "ScaleUnit": "100%"
- },
- {
"BriefDescription": "C6 residency percent per core",
- "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Core_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C6 residency percent per package",
- "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+ "MetricExpr": "cstate_pkg@c6\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C6_Pkg_Residency",
"ScaleUnit": "100%"
},
{
"BriefDescription": "C7 residency percent per core",
- "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+ "MetricExpr": "cstate_core@c7\\-residency@ / msr@tsc@",
"MetricGroup": "Power",
"MetricName": "C7_Core_Residency",
"ScaleUnit": "100%"
},
{
- "BriefDescription": "C7 residency percent per package",
- "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
- "MetricGroup": "Power",
- "MetricName": "C7_Pkg_Residency",
- "ScaleUnit": "100%"
- },
- {
- "BriefDescription": "C8 residency percent per package",
- "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
- "MetricGroup": "Power",
- "MetricName": "C8_Pkg_Residency",
- "ScaleUnit": "100%"
- },
- {
- "BriefDescription": "C9 residency percent per package",
- "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC",
- "MetricGroup": "Power",
- "MetricName": "C9_Pkg_Residency",
- "ScaleUnit": "100%"
- },
- {
"BriefDescription": "Percentage of cycles spent in System Management Interrupts.",
"MetricExpr": "((msr@aperf@ - cycles) / msr@aperf@ if msr@smi@ > 0 else 0)",
"MetricGroup": "smi",
@@ -555,7 +527,7 @@
},
{
"BriefDescription": "Average CPU Utilization",
- "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_atom@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_atom@",
"MetricName": "tma_info_system_cpu_utilization",
"Unit": "cpu_atom"
},
@@ -725,6 +697,13 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Uncore frequency per die [GHZ]",
+ "MetricExpr": "tma_info_system_socket_clks / #num_dies / duration_time / 1e9",
+ "MetricGroup": "SoC",
+ "MetricName": "UNCORE_FREQ",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This metric represents Core fraction of cycles CPU dispatched uops on execution ports for ALU operations.",
"MetricExpr": "cpu_core@UOPS_DISPATCHED.ALU@ / (6 * tma_info_thread_clks)",
"MetricGroup": "TopdownL5;tma_L5_group;tma_ports_utilized_3m_group",
@@ -755,7 +734,7 @@
{
"BriefDescription": "This category represents fraction of slots where no uops are being delivered due to a lack of required resources for accepting new uops in the Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-be\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvOB;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_backend_bound",
"MetricThreshold": "tma_backend_bound > 0.2",
@@ -767,7 +746,7 @@
{
"BriefDescription": "This category represents fraction of slots wasted due to incorrect speculations",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-bad\\-spec@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-bad\\-spec@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_bad_speculation",
"MetricThreshold": "tma_bad_speculation > 0.15",
@@ -794,35 +773,35 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
+ "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
+ "MetricGroup": "BvCB;Cor;tma_issueComp",
+ "MetricName": "tma_bottleneck_compute_bound_est",
+ "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
+ "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_bandwidth / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_sq_full / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_fb_full / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)))",
"MetricGroup": "BvMB;Mem;MemoryBW;Offcore;tma_issueBW",
- "MetricName": "tma_bottleneck_cache_memory_bandwidth",
- "MetricThreshold": "tma_bottleneck_cache_memory_bandwidth > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_bandwidth",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_bandwidth > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Bandwidth related bottlenecks. Related metrics: tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full",
"Unit": "cpu_core"
},
{
"BriefDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks",
- "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_capacity / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_dram_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_mem_latency / (tma_mem_bandwidth + tma_mem_latency)) + tma_memory_bound * (tma_l3_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l3_hit_latency / (tma_contested_accesses + tma_data_sharing + tma_l3_hit_latency + tma_sq_full)) + tma_memory_bound * tma_l2_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_dependency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_l1_latency_capacity / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_lock_latency / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_loads / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_split_stores / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_store_latency / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvML;Mem;MemoryLat;Offcore;tma_issueLat",
- "MetricName": "tma_bottleneck_cache_memory_latency",
- "MetricThreshold": "tma_bottleneck_cache_memory_latency > 20",
+ "MetricName": "tma_bottleneck_data_cache_memory_latency",
+ "MetricThreshold": "tma_bottleneck_data_cache_memory_latency > 20",
"PublicDescription": "Total pipeline cost of external Memory- or Cache-Latency related bottlenecks. Related metrics: tma_l3_hit_latency, tma_mem_latency",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Total pipeline cost when the execution is compute-bound - an estimation",
- "MetricExpr": "100 * (tma_core_bound * tma_divider / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_core_bound * (tma_ports_utilization / (tma_divider + tma_ports_utilization + tma_serializing_operation)) * (tma_ports_utilized_3m / (tma_ports_utilized_0 + tma_ports_utilized_1 + tma_ports_utilized_2 + tma_ports_utilized_3m)))",
- "MetricGroup": "BvCB;Cor;tma_issueComp",
- "MetricName": "tma_bottleneck_compute_bound_est",
- "MetricThreshold": "tma_bottleneck_compute_bound_est > 20",
- "PublicDescription": "Total pipeline cost when the execution is compute-bound - an estimation. Covers Core Bound when High ILP as well as when long-latency execution units are busy. Related metrics: ",
- "Unit": "cpu_core"
- },
- {
"BriefDescription": "Total pipeline cost of instruction fetch bandwidth related bottlenecks (when the front-end could not sustain operations delivery to the back-end)",
- "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms))) - tma_bottleneck_big_code",
+ "MetricExpr": "100 * (tma_frontend_bound - (1 - 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts) * tma_fetch_latency * tma_mispredicts_resteers / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) - (1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms)) - tma_bottleneck_big_code",
"MetricGroup": "BvFB;Fed;FetchBW;Frontend",
"MetricName": "tma_bottleneck_instruction_fetch_bw",
"MetricThreshold": "tma_bottleneck_instruction_fetch_bw > 20",
@@ -830,7 +809,7 @@
},
{
"BriefDescription": "Total pipeline cost of irregular execution (e.g",
- "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_fetch_bandwidth * tma_ms / (tma_dsb + tma_lsd + tma_mite + tma_ms)) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_microcode_sequencer + tma_few_uops_instructions) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
+ "MetricExpr": "100 * ((1 - cpu_core@INST_RETIRED.REP_ITERATION@ / cpu_core@UOPS_RETIRED.MS\\,cmask\\=1@) * (tma_fetch_latency * (tma_ms_switches + tma_branch_resteers * (tma_clears_resteers + tma_mispredicts_resteers * tma_other_mispredicts / tma_branch_mispredicts) / (tma_clears_resteers + tma_mispredicts_resteers + tma_unknown_branches)) / (tma_branch_resteers + tma_dsb_switches + tma_icache_misses + tma_itlb_misses + tma_lcp + tma_ms_switches) + tma_ms) + 10 * tma_microcode_sequencer * tma_other_mispredicts / tma_branch_mispredicts * tma_branch_mispredicts + tma_machine_clears * tma_other_nukes / tma_other_nukes + tma_core_bound * (tma_serializing_operation + cpu_core@RS.EMPTY_RESOURCE@ / tma_info_thread_clks * tma_ports_utilized_0) / (tma_divider + tma_ports_utilization + tma_serializing_operation) + tma_microcode_sequencer / (tma_microcode_sequencer + tma_few_uops_instructions) * (tma_assists / tma_microcode_sequencer) * tma_heavy_operations)",
"MetricGroup": "Bad;BvIO;Cor;Ret;tma_issueMS",
"MetricName": "tma_bottleneck_irregular_overhead",
"MetricThreshold": "tma_bottleneck_irregular_overhead > 10",
@@ -839,7 +818,7 @@
},
{
"BriefDescription": "Total pipeline cost of Memory Address Translation related bottlenecks (data-side TLBs)",
- "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
+ "MetricExpr": "100 * (tma_memory_bound * (tma_l1_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_load / (tma_dtlb_load + tma_fb_full + tma_l1_latency_capacity + tma_l1_latency_dependency + tma_lock_latency + tma_split_loads + tma_store_early_blk + tma_store_fwd_blk)) + tma_memory_bound * (tma_store_bound / (tma_dram_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_store_bound)) * (tma_dtlb_store / (tma_dtlb_store + tma_false_sharing + tma_split_stores + tma_store_latency + tma_streaming_stores)))",
"MetricGroup": "BvMT;Mem;MemoryTLB;Offcore;tma_issueTLB",
"MetricName": "tma_bottleneck_memory_data_tlbs",
"MetricThreshold": "tma_bottleneck_memory_data_tlbs > 20",
@@ -866,7 +845,7 @@
},
{
"BriefDescription": "Total pipeline cost of remaining bottlenecks in the back-end",
- "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_cache_memory_bandwidth + tma_bottleneck_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
+ "MetricExpr": "100 - (tma_bottleneck_big_code + tma_bottleneck_instruction_fetch_bw + tma_bottleneck_mispredictions + tma_bottleneck_data_cache_memory_bandwidth + tma_bottleneck_data_cache_memory_latency + tma_bottleneck_memory_data_tlbs + tma_bottleneck_memory_synchronization + tma_bottleneck_compute_bound_est + tma_bottleneck_irregular_overhead + tma_bottleneck_branching_overhead + tma_bottleneck_useful_work)",
"MetricGroup": "BvOB;Cor;Offcore",
"MetricName": "tma_bottleneck_other_bottlenecks",
"MetricThreshold": "tma_bottleneck_other_bottlenecks > 20",
@@ -883,7 +862,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU has wasted due to Branch Misprediction",
- "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-br\\-mispredict@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BadSpec;BrMispredicts;BvMP;TmaL2;TopdownL2;tma_L2_group;tma_bad_speculation_group;tma_issueBM",
"MetricName": "tma_branch_mispredicts",
"MetricThreshold": "tma_branch_mispredicts > 0.1 & tma_bad_speculation > 0.15",
@@ -1023,7 +1002,6 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles while the memory subsystem was handling synchronizations due to contested accesses",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS@R, 24 * tma_info_system_core_frequency) + cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM@ * min(cpu_core@MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM@R, 25 * tma_info_system_core_frequency)) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "BvMS;DataSharing;LockCont;Offcore;Snoop;TopdownL4;tma_L4_group;tma_issueSyncxn;tma_l3_bound_group",
"MetricName": "tma_contested_accesses",
@@ -1076,7 +1054,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to DSB (decoded uop cache) fetch pipeline",
- "MetricExpr": "(cpu_core@IDQ.DSB_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ + cpu_core@IDQ.DSB_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
+ "MetricExpr": "(cpu_core@IDQ.DSB_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / 2 + cpu_core@IDQ.DSB_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.STARVATION_CYCLES@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
"MetricGroup": "DSB;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_dsb",
"MetricThreshold": "tma_dsb > 0.15 & tma_fetch_bandwidth > 0.2",
@@ -1130,7 +1108,7 @@
"MetricGroup": "BvMB;MemoryBW;TopdownL4;tma_L4_group;tma_issueBW;tma_issueSL;tma_issueSmSt;tma_l1_bound_group",
"MetricName": "tma_fb_full",
"MetricThreshold": "tma_fb_full > 0.3",
- "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
+ "PublicDescription": "This metric does a *rough estimation* of how often L1D Fill Buffer unavailability limited additional L1D miss memory access requests to proceed. The higher the metric value; the deeper the memory hierarchy level the misses are satisfied from (metric values >1 are valid). Often it hints on approaching bandwidth limits (to L2 cache; L3 cache or external memory). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_info_system_dram_bw_use, tma_mem_bandwidth, tma_sq_full, tma_store_latency, tma_streaming_stores",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -1147,7 +1125,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots the CPU was stalled due to Frontend latency issues",
- "MetricExpr": "cpu_core@topdown\\-fetch\\-lat@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-fetch\\-lat@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Frontend;TmaL2;TopdownL2;tma_L2_group;tma_frontend_bound_group",
"MetricName": "tma_fetch_latency",
"MetricThreshold": "tma_fetch_latency > 0.1 & tma_frontend_bound > 0.15",
@@ -1197,7 +1175,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) scalar uops fraction the CPU has retired",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_scalar",
"MetricThreshold": "tma_fp_scalar > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -1207,7 +1185,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic floating-point (FP) vector uops fraction the CPU has retired aggregated across all vector widths",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.VECTOR@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_OPS_RETIRED.VECTOR@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL4;tma_L4_group;tma_fp_arith_group;tma_issue2P",
"MetricName": "tma_fp_vector",
"MetricThreshold": "tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6)",
@@ -1217,7 +1195,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 128-bit wide vectors",
- "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@) / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "(cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_SINGLE@) / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_128b",
"MetricThreshold": "tma_fp_vector_128b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -1227,7 +1205,7 @@
},
{
"BriefDescription": "This metric approximates arithmetic FP vector uops fraction the CPU has retired for 256-bit wide vectors",
- "MetricExpr": "cpu_core@FP_ARITH_INST_RETIRED.VECTOR\\,umask\\=0x30@ / (tma_retiring * tma_info_thread_slots)",
+ "MetricExpr": "cpu_core@FP_ARITH_OPS_RETIRED.VECTOR\\,umask\\=0x30@ / (tma_retiring * tma_info_thread_slots)",
"MetricGroup": "Compute;Flops;TopdownL5;tma_L5_group;tma_fp_vector_group;tma_issue2P",
"MetricName": "tma_fp_vector_256b",
"MetricThreshold": "tma_fp_vector_256b > 0.1 & (tma_fp_vector > 0.1 & (tma_fp_arith > 0.2 & tma_light_operations > 0.6))",
@@ -1238,7 +1216,7 @@
{
"BriefDescription": "This category represents fraction of slots where the processor's Frontend undersupplies its Backend",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-fe\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-fe\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvFB;BvIO;Default;PGO;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_frontend_bound",
"MetricThreshold": "tma_frontend_bound > 0.15",
@@ -1259,7 +1237,7 @@
},
{
"BriefDescription": "This metric represents fraction of slots where the CPU was retiring heavy-weight operations -- instructions that require two or more uops or micro-coded sequences",
- "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-heavy\\-ops@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Retire;TmaL2;TopdownL2;tma_L2_group;tma_retiring_group",
"MetricName": "tma_heavy_operations",
"MetricThreshold": "tma_heavy_operations > 0.1",
@@ -1437,7 +1415,7 @@
},
{
"BriefDescription": "Floating Point Operations Per Cycle",
- "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / tma_info_thread_clks",
+ "MetricExpr": "(cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_OPS_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@) / tma_info_thread_clks",
"MetricGroup": "Flops;Ret",
"MetricName": "tma_info_core_flopc",
"Unit": "cpu_core"
@@ -1578,7 +1556,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + cpu_core@FP_ARITH_INST_RETIRED.VECTOR@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + cpu_core@FP_ARITH_OPS_RETIRED.VECTOR@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_iparith",
"MetricThreshold": "tma_info_inst_mix_iparith < 10",
@@ -1587,7 +1565,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX/SSE 128-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
"MetricName": "tma_info_inst_mix_iparith_avx128",
"MetricThreshold": "tma_info_inst_mix_iparith_avx128 < 10",
@@ -1596,7 +1574,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic AVX* 256-bit instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_DOUBLE@ + cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;FpVector;InsType",
"MetricName": "tma_info_inst_mix_iparith_avx256",
"MetricThreshold": "tma_info_inst_mix_iparith_avx256 < 10",
@@ -1605,7 +1583,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Double-Precision instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_INST_RETIRED.SCALAR_DOUBLE@",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_OPS_RETIRED.SCALAR_DOUBLE@",
"MetricGroup": "Flops;FpScalar;InsType",
"MetricName": "tma_info_inst_mix_iparith_scalar_dp",
"MetricThreshold": "tma_info_inst_mix_iparith_scalar_dp < 10",
@@ -1614,7 +1592,7 @@
},
{
"BriefDescription": "Instructions per FP Arithmetic Scalar Single-Precision instruction (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_INST_RETIRED.SCALAR_SINGLE@",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@FP_ARITH_OPS_RETIRED.SCALAR_SINGLE@",
"MetricGroup": "Flops;FpScalar;InsType",
"MetricName": "tma_info_inst_mix_iparith_scalar_sp",
"MetricThreshold": "tma_info_inst_mix_iparith_scalar_sp < 10",
@@ -1639,7 +1617,7 @@
},
{
"BriefDescription": "Instructions per Floating Point (FP) Operation (lower number means higher occurrence rate)",
- "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@)",
+ "MetricExpr": "cpu_core@INST_RETIRED.ANY@ / (cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_OPS_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@)",
"MetricGroup": "Flops;InsType",
"MetricName": "tma_info_inst_mix_ipflop",
"MetricThreshold": "tma_info_inst_mix_ipflop < 10",
@@ -1694,7 +1672,7 @@
},
{
"BriefDescription": "Average per-thread data fill bandwidth to the L1 data cache [GB / sec]",
- "MetricExpr": "64 * cpu_core@L1D.REPLACEMENT@ / 1e9 / tma_info_system_time",
+ "MetricExpr": "64 * cpu_core@L1D.L1_REPLACEMENT@ / 1e9 / tma_info_system_time",
"MetricGroup": "Mem;MemoryBW",
"MetricName": "tma_info_memory_l1d_cache_fill_bw",
"Unit": "cpu_core"
@@ -1707,6 +1685,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "L0 cache true misses per kilo instruction for retired demand loads",
+ "MetricExpr": "1e3 * (cpu_core@MEM_LOAD_RETIRED.L1_MISS@ + cpu_core@MEM_LOAD_RETIRED.L1_HIT_L1@) / cpu_core@INST_RETIRED.ANY@",
+ "MetricGroup": "CacheHits;Mem",
+ "MetricName": "tma_info_memory_l1dl0_mpki",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "L1 cache true misses per kilo instruction for retired demand loads",
"MetricExpr": "1e3 * cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / cpu_core@INST_RETIRED.ANY@",
"MetricGroup": "CacheHits;Mem",
@@ -1922,6 +1907,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Average number of uops fetched from MS per cycle",
+ "MetricExpr": "cpu_core@IDQ.MS_UOPS@ / cpu_core@IDQ.MS_UOPS\\,cmask\\=1@",
+ "MetricGroup": "Fed;FetchLat;MicroSeq",
+ "MetricName": "tma_info_pipeline_fetch_ms",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Instructions per a microcode Assist invocation",
"MetricExpr": "cpu_core@INST_RETIRED.ANY@ / cpu_core@ASSISTS.ANY@",
"MetricGroup": "MicroSeq;Pipeline;Ret;Retire",
@@ -1955,7 +1947,7 @@
},
{
"BriefDescription": "Measured Average Core Frequency for unhalted processors [GHz]",
- "MetricExpr": "tma_info_system_turbo_utilization * TSC / 1e9 / tma_info_system_time",
+ "MetricExpr": "tma_info_system_turbo_utilization * msr@tsc\\,cpu=cpu_core@ / 1e9 / tma_info_system_time",
"MetricGroup": "Power;Summary",
"MetricName": "tma_info_system_core_frequency",
"Unit": "cpu_core"
@@ -1969,14 +1961,22 @@
},
{
"BriefDescription": "Average number of utilized CPUs",
- "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / TSC",
+ "MetricExpr": "cpu_core@CPU_CLK_UNHALTED.REF_TSC@ / msr@tsc\\,cpu=cpu_core@",
"MetricGroup": "Summary",
"MetricName": "tma_info_system_cpus_utilized",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
+ "MetricExpr": "32 * UNC_M_TOTAL_DATA / 1e9 / tma_info_system_time",
+ "MetricGroup": "HPC;MemOffcore;MemoryBW;SoC;tma_issueBW",
+ "MetricName": "tma_info_system_dram_bw_use",
+ "PublicDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]. Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_mem_bandwidth, tma_sq_full",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Giga Floating Point Operations Per Second",
- "MetricExpr": "(cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_INST_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE@) / 1e9 / tma_info_system_time",
+ "MetricExpr": "(cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + 2 * cpu_core@FP_ARITH_OPS_RETIRED.128B_PACKED_DOUBLE@ + 4 * cpu_core@FP_ARITH_OPS_RETIRED.4_FLOPS@ + 8 * cpu_core@FP_ARITH_OPS_RETIRED.256B_PACKED_SINGLE@) / 1e9 / tma_info_system_time",
"MetricGroup": "Cor;Flops;HPC",
"MetricName": "tma_info_system_gflops",
"PublicDescription": "Giga Floating Point Operations Per Second. Aggregate across all supported options of: FP precisions, scalar and vector instructions, vector-width",
@@ -2021,6 +2021,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Socket actual clocks when any core is active on that socket",
+ "MetricExpr": "UNC_CLOCK.SOCKET",
+ "MetricGroup": "SoC",
+ "MetricName": "tma_info_system_socket_clks",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Run duration time in seconds",
"MetricExpr": "duration_time",
"MetricGroup": "Summary",
@@ -2036,6 +2043,13 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Measured Average Uncore Frequency for the SoC [GHz]",
+ "MetricExpr": "tma_info_system_socket_clks / 1e9 / tma_info_system_time",
+ "MetricGroup": "SoC",
+ "MetricName": "tma_info_system_uncore_frequency",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Per-Logical Processor actual clocks when the Logical Processor is active.",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.THREAD@",
"MetricGroup": "Pipeline",
@@ -2156,12 +2170,12 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
- "MetricExpr": "4 * cpu_core@DEPENDENT_LOADS.ANY@ / tma_info_thread_clks",
+ "BriefDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache",
+ "MetricExpr": "4 * cpu_core@DEPENDENT_LOADS.ANY\\,cmask\\=1@ / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_l1_bound_group",
"MetricName": "tma_l1_latency_dependency",
"MetricThreshold": "tma_l1_latency_dependency > 0.1 & (tma_l1_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_UOPS_RETIRED.L1_HIT_PS",
+ "PublicDescription": "This metric ([SKL+] roughly; [LNL]) estimates fraction of cycles with demand load accesses that hit the L1D cache. The short latency of the L1D cache may be exposed in pointer-chasing memory access patterns as an example. Sample with: MEM_LOAD_UOPS_RETIRED.L1_HIT_PS",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2177,7 +2191,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles with demand load accesses that hit the L2 cache under unloaded scenarios (possibly L2 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_LOAD_RETIRED.L2_HIT@ * min(cpu_core@MEM_LOAD_RETIRED.L2_HIT@R, 3 * tma_info_system_core_frequency) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "MemoryLat;TopdownL4;tma_L4_group;tma_l2_bound_group",
"MetricName": "tma_l2_hit_latency",
@@ -2198,12 +2211,11 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited)",
- "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_LOAD_RETIRED.L3_HIT@ * min(cpu_core@MEM_LOAD_RETIRED.L3_HIT@R, 9 * tma_info_system_core_frequency) * (1 + cpu_core@MEM_LOAD_RETIRED.FB_HIT@ / cpu_core@MEM_LOAD_RETIRED.L1_MISS@ / 2) / tma_info_thread_clks",
"MetricGroup": "BvML;MemoryLat;TopdownL4;tma_L4_group;tma_issueLat;tma_l3_bound_group",
"MetricName": "tma_l3_hit_latency",
"MetricThreshold": "tma_l3_hit_latency > 0.1 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_cache_memory_latency, tma_mem_latency",
+ "PublicDescription": "This metric estimates fraction of cycles with demand load accesses that hit the L3 cache under unloaded scenarios (possibly L3 latency limited). Avoiding private cache misses (i.e. L2 misses/L3 hits) will improve the latency; reduce contention with sibling physical cores and increase performance. Note the value of this node may overlap with its siblings. Sample with: MEM_LOAD_RETIRED.L3_HIT_PS. Related metrics: tma_bottleneck_data_cache_memory_latency, tma_mem_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2285,6 +2297,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU spent handling cache misses due to lock operations",
+ "MetricConstraint": "NO_GROUP_EVENTS",
"MetricExpr": "cpu_core@MEM_INST_RETIRED.LOCK_LOADS@ * cpu_core@MEM_INST_RETIRED.LOCK_LOADS@R / tma_info_thread_clks",
"MetricGroup": "LockCont;Offcore;TopdownL4;tma_L4_group;tma_issueRFO;tma_l1_bound_group",
"MetricName": "tma_lock_latency",
@@ -2295,7 +2308,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to LSD (Loop Stream Detector) unit",
- "MetricExpr": "cpu_core@LSD.UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@LSD.UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / tma_info_thread_clks / 2",
"MetricGroup": "FetchBW;LSD;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_lsd",
"MetricThreshold": "tma_lsd > 0.15 & tma_fetch_bandwidth > 0.2",
@@ -2320,7 +2333,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueBW",
"MetricName": "tma_mem_bandwidth",
"MetricThreshold": "tma_mem_bandwidth > 0.2 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
+ "PublicDescription": "This metric estimates fraction of cycles where the core's performance was likely hurt due to approaching bandwidth limits of external memory - DRAM ([SPR-HBM] and/or HBM). The underlying heuristic assumes that a similar off-core traffic is generated by all IA cores. This metric does not aggregate non-data-read requests by this logical processor; requests from other IA Logical Processors/Physical Cores/sockets; or other non-IA devices like GPU; hence the maximum external memory bandwidth limits may or may not be approached when this metric is flagged (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_sq_full",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2330,13 +2343,13 @@
"MetricGroup": "BvML;MemoryLat;Offcore;TopdownL4;tma_L4_group;tma_dram_bound_group;tma_issueLat",
"MetricName": "tma_mem_latency",
"MetricThreshold": "tma_mem_latency > 0.1 & (tma_dram_bound > 0.1 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_cache_memory_latency, tma_l3_hit_latency",
+ "PublicDescription": "This metric estimates fraction of cycles where the performance was likely hurt due to latency from external memory - DRAM ([SPR-HBM] and/or HBM). This metric does not aggregate requests from other Logical Processors/Physical Cores/sockets (see Uncore counters for that). Related metrics: tma_bottleneck_data_cache_memory_latency, tma_l3_hit_latency",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
{
"BriefDescription": "This metric represents fraction of slots the Memory subsystem within the Backend was a bottleneck",
- "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-mem\\-bound@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "Backend;TmaL2;TopdownL2;tma_L2_group;tma_backend_bound_group",
"MetricName": "tma_memory_bound",
"MetricThreshold": "tma_memory_bound > 0.2 & tma_backend_bound > 0.2",
@@ -2347,7 +2360,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to LFENCE Instructions.",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "13 * cpu_core@MISC2_RETIRED.LFENCE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_memory_fence",
@@ -2386,7 +2398,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the MITE pipeline (the legacy decode pipeline)",
- "MetricExpr": "(cpu_core@IDQ.MITE_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / 2 + cpu_core@IDQ.MITE_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
+ "MetricExpr": "(cpu_core@IDQ.MITE_UOPS\\,cmask\\=0x8\\,inv\\=0x1@ / 2 + cpu_core@IDQ.MITE_UOPS@ / (cpu_core@IDQ.DSB_UOPS@ + cpu_core@IDQ.MITE_UOPS@) * (cpu_core@IDQ_BUBBLES.STARVATION_CYCLES@ - cpu_core@IDQ_BUBBLES.FETCH_LATENCY@)) / tma_info_thread_clks",
"MetricGroup": "DSBmiss;FetchBW;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_mite",
"MetricThreshold": "tma_mite > 0.1 & tma_fetch_bandwidth > 0.2",
@@ -2406,7 +2418,7 @@
},
{
"BriefDescription": "This metric represents Core fraction of cycles in which CPU was likely limited due to the Microcode Sequencer (MS) unit - see Microcode_Sequencer node for details.",
- "MetricExpr": "cpu_core@IDQ.MS_CYCLES_ANY@ / tma_info_thread_clks",
+ "MetricExpr": "cpu_core@IDQ.MS_CYCLES_ANY@ / tma_info_thread_clks / 1.8",
"MetricGroup": "MicroSeq;TopdownL3;tma_L3_group;tma_fetch_bandwidth_group",
"MetricName": "tma_ms",
"MetricThreshold": "tma_ms > 0.05 & tma_fetch_bandwidth > 0.2",
@@ -2445,7 +2457,8 @@
},
{
"BriefDescription": "This metric represents the remaining light uops fraction the CPU has executed - remaining means not covered by other sibling nodes",
- "MetricExpr": "max(0, tma_light_operations - (tma_x87_use + (cpu_core@FP_ARITH_INST_RETIRED.SCALAR@ + cpu_core@FP_ARITH_INST_RETIRED.VECTOR@) / (tma_retiring * tma_info_thread_slots) + (cpu_core@INT_VEC_RETIRED.ADD_128@ + cpu_core@INT_VEC_RETIRED.VNNI_128@ + cpu_core@INT_VEC_RETIRED.ADD_256@ + cpu_core@INT_VEC_RETIRED.MUL_256@ + cpu_core@INT_VEC_RETIRED.VNNI_256@) / (tma_retiring * tma_info_thread_slots) + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
+ "MetricConstraint": "NO_GROUP_EVENTS",
+ "MetricExpr": "max(0, tma_light_operations - (tma_x87_use + (cpu_core@FP_ARITH_OPS_RETIRED.SCALAR@ + cpu_core@FP_ARITH_OPS_RETIRED.VECTOR@) / (tma_retiring * tma_info_thread_slots) + (cpu_core@INT_VEC_RETIRED.ADD_128@ + cpu_core@INT_VEC_RETIRED.VNNI_128@ + cpu_core@INT_VEC_RETIRED.ADD_256@ + cpu_core@INT_VEC_RETIRED.MUL_256@ + cpu_core@INT_VEC_RETIRED.VNNI_256@) / (tma_retiring * tma_info_thread_slots) + tma_memory_operations + tma_fused_instructions + tma_non_fused_branches))",
"MetricGroup": "Pipeline;TopdownL3;tma_L3_group;tma_light_operations_group",
"MetricName": "tma_other_light_ops",
"MetricThreshold": "tma_other_light_ops > 0.3 & tma_light_operations > 0.6",
@@ -2483,6 +2496,7 @@
},
{
"BriefDescription": "This metric estimates fraction of cycles the CPU performance was potentially limited due to Core computation issues (non divider-related)",
+ "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "((cpu_core@EXE_ACTIVITY.EXE_BOUND_0_PORTS@ + (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@)) / tma_info_thread_clks if cpu_core@ARITH.DIV_ACTIVE@ < cpu_core@CYCLE_ACTIVITY.STALLS_TOTAL@ - cpu_core@EXE_ACTIVITY.BOUND_ON_LOADS@ else (cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ + tma_retiring * cpu_core@EXE_ACTIVITY.2_3_PORTS_UTIL@) / tma_info_thread_clks)",
"MetricGroup": "PortsUtil;TopdownL3;tma_L3_group;tma_core_bound_group",
"MetricName": "tma_ports_utilization",
@@ -2493,6 +2507,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed no uops on any execution port (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.EXE_BOUND_0_PORTS@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_0",
@@ -2503,6 +2518,7 @@
},
{
"BriefDescription": "This metric represents fraction of cycles where the CPU executed total of 1 uop per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
+ "MetricConstraint": "NO_THRESHOLD_AND_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.1_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issueL1;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_1",
@@ -2513,7 +2529,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 2 uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@EXE_ACTIVITY.2_PORTS_UTIL@ / tma_info_thread_clks",
"MetricGroup": "PortsUtil;TopdownL4;tma_L4_group;tma_issue2P;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_2",
@@ -2524,7 +2539,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles CPU executed total of 3 or more uops per cycle on all execution ports (Logical Processor cycles since ICL, Physical Core cycles otherwise)",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@UOPS_EXECUTED.CYCLES_GE_3@ / tma_info_thread_clks",
"MetricGroup": "BvCB;PortsUtil;TopdownL4;tma_L4_group;tma_ports_utilization_group",
"MetricName": "tma_ports_utilized_3m",
@@ -2545,7 +2559,7 @@
{
"BriefDescription": "This category represents fraction of slots utilized by useful work i.e. issued uops that eventually get retired",
"DefaultMetricgroupName": "TopdownL1",
- "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@) + 0 * tma_info_thread_slots",
+ "MetricExpr": "cpu_core@topdown\\-retiring@ / (cpu_core@topdown\\-fe\\-bound@ + cpu_core@topdown\\-bad\\-spec@ + cpu_core@topdown\\-retiring@ + cpu_core@topdown\\-be\\-bound@)",
"MetricGroup": "BvUW;Default;TmaL1;TopdownL1;tma_L1_group",
"MetricName": "tma_retiring",
"MetricThreshold": "tma_retiring > 0.7 | tma_heavy_operations > 0.1",
@@ -2576,7 +2590,6 @@
},
{
"BriefDescription": "This metric represents fraction of cycles the CPU was stalled due to PAUSE Instructions",
- "MetricConstraint": "NO_GROUP_EVENTS_NMI",
"MetricExpr": "cpu_core@CPU_CLK_UNHALTED.PAUSE@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_serializing_operation_group",
"MetricName": "tma_slow_pause",
@@ -2611,7 +2624,7 @@
"MetricGroup": "BvMB;MemoryBW;Offcore;TopdownL4;tma_L4_group;tma_issueBW;tma_l3_bound_group",
"MetricName": "tma_sq_full",
"MetricThreshold": "tma_sq_full > 0.3 & (tma_l3_bound > 0.05 & (tma_memory_bound > 0.2 & tma_backend_bound > 0.2))",
- "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
+ "PublicDescription": "This metric measures fraction of cycles where the Super Queue (SQ) was full taking into account all request-types and both hardware SMT threads (Logical Processors). Related metrics: tma_bottleneck_data_cache_memory_bandwidth, tma_fb_full, tma_info_system_dram_bw_use, tma_mem_bandwidth",
"ScaleUnit": "100%",
"Unit": "cpu_core"
},
@@ -2626,6 +2639,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "This metric estimates clocks wasted due to loads blocked due to unknown store address (did not do memory disambiguation) or due to unknown store data",
+ "MetricExpr": "7 * cpu_core@LD_BLOCKS.STORE_EARLY\\,cmask\\=1@ / tma_info_thread_clks",
+ "MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
+ "MetricName": "tma_store_early_blk",
+ "MetricThreshold": "tma_store_early_blk > 0.2",
+ "ScaleUnit": "100%",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "This metric roughly estimates fraction of cycles when the memory subsystem had loads blocked since they could not forward data from earlier (in program order) overlapping stores",
"MetricExpr": "13 * cpu_core@LD_BLOCKS.STORE_FORWARD@ / tma_info_thread_clks",
"MetricGroup": "TopdownL4;tma_L4_group;tma_l1_bound_group",
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/memory.json b/tools/perf/pmu-events/arch/x86/lunarlake/memory.json
index 8021a1c7dd3b..caa387e10259 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/memory.json
@@ -163,7 +163,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_1024",
"MSRIndex": "0x3F6",
"MSRValue": "0x400",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 1024 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "53",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -176,7 +176,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
"MSRIndex": "0x3F6",
"MSRValue": "0x80",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "1009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -189,7 +189,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
"MSRIndex": "0x3F6",
"MSRValue": "0x10",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "20011",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -202,7 +202,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_2048",
"MSRIndex": "0x3F6",
"MSRValue": "0x800",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 2048 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "23",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -215,7 +215,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
"MSRIndex": "0x3F6",
"MSRValue": "0x100",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "503",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -228,7 +228,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
"MSRIndex": "0x3F6",
"MSRValue": "0x20",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100007",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -241,7 +241,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
"MSRIndex": "0x3F6",
"MSRValue": "0x4",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -254,7 +254,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
"MSRIndex": "0x3F6",
"MSRValue": "0x200",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "101",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -267,7 +267,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
"MSRIndex": "0x3F6",
"MSRValue": "0x40",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "2003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -280,7 +280,7 @@
"EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
"MSRIndex": "0x3F6",
"MSRValue": "0x8",
- "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency. Available PDIST counters: 0",
+ "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles. Reported latency may be longer than just the memory latency.",
"SampleAfterValue": "50021",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -291,7 +291,7 @@
"Data_LA": "1",
"EventCode": "0xcd",
"EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE",
- "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8 Available PDIST counters: 0",
+ "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8 Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -352,7 +352,7 @@
},
{
"BriefDescription": "Counts demand data reads that were supplied by DRAM.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.DRAM",
"MSRIndex": "0x1a6,0x1a7",
@@ -376,7 +376,7 @@
},
{
"BriefDescription": "Counts demand data reads that were not supplied by the L3 cache and were supplied by the system memory (DRAM, MSC, or MMIO).",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
@@ -412,7 +412,7 @@
},
{
"BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache and were supplied by the system memory (DRAM, MSC, or MMIO).",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.DEMAND_RFO.L3_MISS",
"MSRIndex": "0x1a6,0x1a7",
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/other.json b/tools/perf/pmu-events/arch/x86/lunarlake/other.json
index 59949f9541d8..164374edf293 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/other.json
@@ -151,7 +151,7 @@
},
{
"BriefDescription": "Counts streaming stores that have any type of response.",
- "Counter": "0,1,2,3",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0x2A,0x2B",
"EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
"MSRIndex": "0x1a6,0x1a7",
@@ -178,6 +178,7 @@
"EventCode": "0xf4",
"EventName": "XQ_PROMOTION.ALL",
"SampleAfterValue": "1000003",
+ "UMask": "0x7",
"Unit": "cpu_atom"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json b/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json
index 6ac410510628..97797f7b072e 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/pipeline.json
@@ -21,8 +21,9 @@
"Unit": "cpu_core"
},
{
- "BriefDescription": "Counts the number of active floating point and integer dividers per cycle.",
+ "BriefDescription": "This event is deprecated.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "ARITH.DIV_OCCUPANCY",
"SampleAfterValue": "1000003",
@@ -30,8 +31,9 @@
"Unit": "cpu_atom"
},
{
- "BriefDescription": "Counts the number of floating point and integer divider uops executed per cycle.",
+ "BriefDescription": "This event is deprecated.",
"Counter": "0,1,2,3,4,5,6,7",
+ "Deprecated": "1",
"EventCode": "0xcd",
"EventName": "ARITH.DIV_UOPS",
"SampleAfterValue": "1000003",
@@ -110,7 +112,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts all branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts all branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"Unit": "cpu_core"
},
@@ -128,7 +130,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND",
- "PublicDescription": "Counts conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x111",
"Unit": "cpu_core"
@@ -147,7 +149,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_NTAKEN",
- "PublicDescription": "Counts not taken branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts not taken branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -166,7 +168,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN",
- "PublicDescription": "Counts taken conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x101",
"Unit": "cpu_core"
@@ -176,7 +178,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN_BWD",
- "PublicDescription": "Counts taken backward conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken backward conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -186,7 +188,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.COND_TAKEN_FWD",
- "PublicDescription": "Counts taken forward conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken forward conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x102",
"Unit": "cpu_core"
@@ -205,7 +207,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.FAR_BRANCH",
- "PublicDescription": "Counts far branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts far branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -224,7 +226,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.INDIRECT",
- "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0",
+ "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -261,7 +263,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_CALL",
- "PublicDescription": "Counts both direct and indirect near call instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts both direct and indirect near call instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -280,13 +282,13 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_RETURN",
- "PublicDescription": "Counts return instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts return instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x8",
"Unit": "cpu_core"
},
{
- "BriefDescription": "Counts the number of taken branch instructions retired",
+ "BriefDescription": "Counts the number of near taken branch instructions retired",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_TAKEN",
@@ -299,7 +301,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc4",
"EventName": "BR_INST_RETIRED.NEAR_TAKEN",
- "PublicDescription": "Counts taken branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -327,7 +329,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
- "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path. Available PDIST counters: 0",
+ "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"Unit": "cpu_core"
},
@@ -336,7 +338,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.ALL_BRANCHES_COST",
- "PublicDescription": "All mispredicted branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "All mispredicted branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x44",
"Unit": "cpu_core"
@@ -355,7 +357,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND",
- "PublicDescription": "Counts mispredicted conditional branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts mispredicted conditional branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x111",
"Unit": "cpu_core"
@@ -365,7 +367,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_COST",
- "PublicDescription": "Mispredicted conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x151",
"Unit": "cpu_core"
@@ -384,7 +386,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_NTAKEN",
- "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -394,7 +396,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_NTAKEN_COST",
- "PublicDescription": "Mispredicted non-taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted non-taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x50",
"Unit": "cpu_core"
@@ -413,7 +415,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN",
- "PublicDescription": "Counts taken conditional mispredicted branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x101",
"Unit": "cpu_core"
@@ -423,7 +425,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_BWD",
- "PublicDescription": "Counts taken backward conditional mispredicted branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken backward conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -433,7 +435,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_BWD_COST",
- "PublicDescription": "number of branch instructions retired that were mispredicted and taken backward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "number of branch instructions retired that were mispredicted and taken backward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x8001",
"Unit": "cpu_core"
@@ -443,7 +445,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_COST",
- "PublicDescription": "Mispredicted taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted taken conditional branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x141",
"Unit": "cpu_core"
@@ -453,7 +455,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_FWD",
- "PublicDescription": "Counts taken forward conditional mispredicted branch instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "Counts taken forward conditional mispredicted branch instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"Unit": "cpu_core"
},
@@ -462,7 +464,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.COND_TAKEN_FWD_COST",
- "PublicDescription": "number of branch instructions retired that were mispredicted and taken forward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "number of branch instructions retired that were mispredicted and taken forward. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x8002",
"Unit": "cpu_core"
@@ -481,7 +483,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT",
- "PublicDescription": "Counts miss-predicted near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0",
+ "PublicDescription": "Counts miss-predicted near indirect branch instructions retired excluding returns. TSX abort is an indirect branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -500,7 +502,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
- "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect. Available PDIST counters: 0",
+ "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -510,7 +512,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_CALL_COST",
- "PublicDescription": "Mispredicted indirect CALL retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted indirect CALL retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x42",
"Unit": "cpu_core"
@@ -520,7 +522,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.INDIRECT_COST",
- "PublicDescription": "Mispredicted near indirect branch instructions retired (excluding returns). This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted near indirect branch instructions retired (excluding returns). This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100003",
"UMask": "0xc0",
"Unit": "cpu_core"
@@ -548,7 +550,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
- "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -558,7 +560,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.NEAR_TAKEN_COST",
- "PublicDescription": "Mispredicted taken near branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted taken near branch instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "400009",
"UMask": "0x60",
"Unit": "cpu_core"
@@ -568,7 +570,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.RET",
- "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired. Available PDIST counters: 0",
+ "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -587,7 +589,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc5",
"EventName": "BR_MISP_RETIRED.RET_COST",
- "PublicDescription": "Mispredicted ret instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0",
+ "PublicDescription": "Mispredicted ret instructions retired. This precise event may be used to get the misprediction cost via the Retire_Latency field of PEBS. It fires on the instruction that immediately follows the mispredicted branch. Available PDIST counters: 0,1",
"SampleAfterValue": "100007",
"UMask": "0x48",
"Unit": "cpu_core"
@@ -906,7 +908,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.ANY_P",
- "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter. Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"Unit": "cpu_core"
},
@@ -915,7 +917,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.BR_FUSED",
- "PublicDescription": "retired macro-fused uops when there is a branch in the macro-fused pair (the two instructions that got macro-fused count once in this pmon) Available PDIST counters: 0",
+ "PublicDescription": "retired macro-fused uops when there is a branch in the macro-fused pair (the two instructions that got macro-fused count once in this pmon) Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -925,7 +927,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.MACRO_FUSED",
- "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0",
+ "PublicDescription": "INST_RETIRED.MACRO_FUSED Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"UMask": "0x30",
"Unit": "cpu_core"
@@ -935,7 +937,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.NOP",
- "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions Available PDIST counters: 0",
+ "PublicDescription": "Counts all retired NOP or ENDBR32/64 or PREFETCHIT0/1 instructions Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -954,7 +956,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xc0",
"EventName": "INST_RETIRED.REP_ITERATION",
- "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0",
+ "PublicDescription": "Number of iterations of Repeat (REP) string retired instructions such as MOVS, CMPS, and SCAS. Each has a byte, word, and doubleword version and string instructions can be repeated using a repetition prefix, REP, that allows their architectural execution to be repeated a number of times as specified by the RCX register. Note the number of iterations is implementation-dependent. Available PDIST counters: 0,1",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1024,6 +1026,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of uops executed on secondary integer ports 0,1,2,3.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0xb3",
+ "EventName": "INT_UOPS_EXECUTED.2ND",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x80",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of uops executed on all Integer ports.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
@@ -1205,7 +1216,7 @@
"EventCode": "0x03",
"EventName": "LD_BLOCKS.ALL",
"SampleAfterValue": "1000003",
- "UMask": "0x10",
+ "UMask": "0x1f",
"Unit": "cpu_atom"
},
{
@@ -1228,6 +1239,15 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts the number of times a load got early blocked due to preceding store operation with unknown address or unknown data. Excluding in-line (immediate) wakeups",
+ "Counter": "0,1,2,3,4,5,6,7,8,9",
+ "EventCode": "0x03",
+ "EventName": "LD_BLOCKS.STORE_EARLY",
+ "SampleAfterValue": "100003",
+ "UMask": "0xa1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts the number of occurrences a retired load gets blocked because its address partially overlaps with an older store (size mismatch) - unknown_sta/bad_forward",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x03",
@@ -1451,7 +1471,7 @@
"Counter": "0,1,2,3,4,5,6,7,8,9",
"EventCode": "0xe4",
"EventName": "MISC_RETIRED.LBR_INSERTS",
- "PublicDescription": "LBR record is inserted Available PDIST counters: 0",
+ "PublicDescription": "LBR record is inserted Available PDIST counters: 0,1",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1605,6 +1625,15 @@
"Unit": "cpu_atom"
},
{
+ "BriefDescription": "Counts the number of issue slots where no uop could issue due to an IQ scoreboard that stalls allocation until a specified older uop retires or (in the case of jump scoreboard) executes. Commonly executed instructions with IQ scoreboards include LFENCE and MFENCE.",
+ "Counter": "0,1,2,3,4,5,6,7",
+ "EventCode": "0x75",
+ "EventName": "SERIALIZATION.IQ_JEU_SCB",
+ "SampleAfterValue": "1000003",
+ "UMask": "0x1",
+ "Unit": "cpu_atom"
+ },
+ {
"BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x75",
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/uncore-interconnect.json b/tools/perf/pmu-events/arch/x86/lunarlake/uncore-interconnect.json
new file mode 100644
index 000000000000..69ef928d57f6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/uncore-interconnect.json
@@ -0,0 +1,10 @@
+[
+ {
+ "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+ "Counter": "FIXED",
+ "EventCode": "0xff",
+ "EventName": "UNC_CLOCK.SOCKET",
+ "PerPkg": "1",
+ "Unit": "SANTA"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/lunarlake/uncore-memory.json b/tools/perf/pmu-events/arch/x86/lunarlake/uncore-memory.json
index 7d63580302de..63c4aa2791e4 100644
--- a/tools/perf/pmu-events/arch/x86/lunarlake/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/lunarlake/uncore-memory.json
@@ -32,5 +32,13 @@
"Experimental": "1",
"PerPkg": "1",
"Unit": "iMC"
+ },
+ {
+ "BriefDescription": "Total number of read and write byte transfers to/from DRAM, in 32B chunk, per DDR channel. Counter increments by 1 after sending or receiving 32B chunk data.",
+ "Counter": "0,1,2,3,4",
+ "EventCode": "0x3C",
+ "EventName": "UNC_M_TOTAL_DATA",
+ "PerPkg": "1",
+ "Unit": "iMC"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 354ce241500b..3d0c57198056 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -1,7 +1,7 @@
Family-model,Version,Filename,EventType
-GenuineIntel-6-(97|9A|B7|BA|BF),v1.31,alderlake,core
-GenuineIntel-6-BE,v1.31,alderlaken,core
-GenuineIntel-6-C[56],v1.09,arrowlake,core
+GenuineIntel-6-(97|9A|B7|BA|BF),v1.35,alderlake,core
+GenuineIntel-6-BE,v1.35,alderlaken,core
+GenuineIntel-6-C[56],v1.14,arrowlake,core
GenuineIntel-6-(1C|26|27|35|36),v5,bonnell,core
GenuineIntel-6-(3D|47),v30,broadwell,core
GenuineIntel-6-56,v12,broadwellde,core
@@ -9,28 +9,28 @@ GenuineIntel-6-4F,v23,broadwellx,core
GenuineIntel-6-55-[56789ABCDEF],v1.25,cascadelakex,core
GenuineIntel-6-DD,v1.00,clearwaterforest,core
GenuineIntel-6-9[6C],v1.05,elkhartlake,core
-GenuineIntel-6-CF,v1.14,emeraldrapids,core
+GenuineIntel-6-CF,v1.20,emeraldrapids,core
GenuineIntel-6-5[CF],v13,goldmont,core
GenuineIntel-6-7A,v1.01,goldmontplus,core
-GenuineIntel-6-B6,v1.09,grandridge,core
-GenuineIntel-6-A[DE],v1.10,graniterapids,core
+GenuineIntel-6-B6,v1.10,grandridge,core
+GenuineIntel-6-A[DE],v1.16,graniterapids,core
GenuineIntel-6-(3C|45|46),v36,haswell,core
GenuineIntel-6-3F,v29,haswellx,core
GenuineIntel-6-7[DE],v1.24,icelake,core
-GenuineIntel-6-6[AC],v1.28,icelakex,core
+GenuineIntel-6-6[AC],v1.30,icelakex,core
GenuineIntel-6-3A,v24,ivybridge,core
GenuineIntel-6-3E,v24,ivytown,core
GenuineIntel-6-2D,v24,jaketown,core
GenuineIntel-6-(57|85),v16,knightslanding,core
-GenuineIntel-6-BD,v1.14,lunarlake,core
-GenuineIntel-6-(AA|AC|B5),v1.14,meteorlake,core
+GenuineIntel-6-BD,v1.19,lunarlake,core
+GenuineIntel-6-(AA|AC|B5),v1.18,meteorlake,core
GenuineIntel-6-1[AEF],v4,nehalemep,core
GenuineIntel-6-2E,v4,nehalemex,core
-GenuineIntel-6-CC,v1.00,pantherlake,core
+GenuineIntel-6-CC,v1.02,pantherlake,core
GenuineIntel-6-A7,v1.04,rocketlake,core
GenuineIntel-6-2A,v19,sandybridge,core
-GenuineIntel-6-8F,v1.28,sapphirerapids,core
-GenuineIntel-6-AF,v1.11,sierraforest,core
+GenuineIntel-6-8F,v1.35,sapphirerapids,core
+GenuineIntel-6-AF,v1.13,sierraforest,core
GenuineIntel-6-(37|4A|4C|4D|5A),v15,silvermont,core
GenuineIntel-6-(4E|5E|8E|9E|A5|A6),v59,skylake,core
GenuineIntel-6-55-[01234],v1.37,skylakex,core
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
index 82b115183924..d3fc04b2ffbd 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/cache.json
@@ -14,7 +14,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.HWPF_MISS",
- "PublicDescription": "L1D.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -24,7 +23,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x51",
"EventName": "L1D.REPLACEMENT",
- "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace. Available PDIST counters: 0",
+ "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -34,7 +33,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -46,7 +45,7 @@
"EdgeDetect": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
- "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -56,7 +55,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.L2_STALLS",
- "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -66,7 +65,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING",
- "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -77,7 +76,7 @@
"CounterMask": "1",
"EventCode": "0x48",
"EventName": "L1D_PEND_MISS.PENDING_CYCLES",
- "PublicDescription": "Counts duration of L1D miss outstanding in cycles. Available PDIST counters: 0",
+ "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -87,7 +86,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x25",
"EventName": "L2_LINES_IN.ALL",
- "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
"SampleAfterValue": "100003",
"UMask": "0x1f",
"Unit": "cpu_core"
@@ -147,7 +146,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.NON_SILENT",
- "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3 Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3",
"SampleAfterValue": "200003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -167,7 +166,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.SILENT",
- "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of lines that are silently dropped by L2 cache. These lines are typically in Shared or Exclusive state. A non-threaded event.",
"SampleAfterValue": "200003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -177,7 +176,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x26",
"EventName": "L2_LINES_OUT.USELESS_HWPF",
- "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
"SampleAfterValue": "200003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -187,7 +186,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.ALL",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.REFERENCES]",
"SampleAfterValue": "200003",
"UMask": "0xff",
"Unit": "cpu_core"
@@ -206,7 +205,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.HIT",
- "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_RQSTS.HIT] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_RQSTS.HIT]",
"SampleAfterValue": "200003",
"UMask": "0xdf",
"Unit": "cpu_core"
@@ -225,7 +224,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_REQUEST.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_RQSTS.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f",
"Unit": "cpu_core"
@@ -244,7 +243,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_CODE_RD",
- "PublicDescription": "Counts the total number of L2 code requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of L2 code requests.",
"SampleAfterValue": "200003",
"UMask": "0xe4",
"Unit": "cpu_core"
@@ -254,7 +253,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
- "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0xe1",
"Unit": "cpu_core"
@@ -264,7 +263,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_MISS",
- "PublicDescription": "Counts demand requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x27",
"Unit": "cpu_core"
@@ -274,7 +273,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES",
- "PublicDescription": "Counts demand requests to L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand requests to L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xe7",
"Unit": "cpu_core"
@@ -284,7 +283,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_HWPF",
- "PublicDescription": "L2_RQSTS.ALL_HWPF Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0xf0",
"Unit": "cpu_core"
@@ -294,7 +292,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.ALL_RFO",
- "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches. Available PDIST counters: 0",
+ "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
"SampleAfterValue": "200003",
"UMask": "0xe2",
"Unit": "cpu_core"
@@ -304,7 +302,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_HIT",
- "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
"SampleAfterValue": "200003",
"UMask": "0xc4",
"Unit": "cpu_core"
@@ -314,7 +312,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.CODE_RD_MISS",
- "PublicDescription": "Counts L2 cache misses when fetching instructions. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 cache misses when fetching instructions.",
"SampleAfterValue": "200003",
"UMask": "0x24",
"Unit": "cpu_core"
@@ -324,7 +322,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
- "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc1",
"Unit": "cpu_core"
@@ -334,7 +332,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
- "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once. Available PDIST counters: 0",
+ "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
"SampleAfterValue": "200003",
"UMask": "0x21",
"Unit": "cpu_core"
@@ -344,7 +342,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HIT",
- "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_REQUEST.HIT] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that hit L2 cache. [This event is alias to L2_REQUEST.HIT]",
"SampleAfterValue": "200003",
"UMask": "0xdf",
"Unit": "cpu_core"
@@ -354,7 +352,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.HWPF_MISS",
- "PublicDescription": "L2_RQSTS.HWPF_MISS Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x30",
"Unit": "cpu_core"
@@ -364,7 +361,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.MISS",
- "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS] Available PDIST counters: 0",
+ "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.MISS]",
"SampleAfterValue": "200003",
"UMask": "0x3f",
"Unit": "cpu_core"
@@ -374,7 +371,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.REFERENCES",
- "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL] Available PDIST counters: 0",
+ "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. [This event is alias to L2_REQUEST.ALL]",
"SampleAfterValue": "200003",
"UMask": "0xff",
"Unit": "cpu_core"
@@ -384,7 +381,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_HIT",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0xc2",
"Unit": "cpu_core"
@@ -394,7 +391,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.RFO_MISS",
- "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x22",
"Unit": "cpu_core"
@@ -404,7 +401,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_HIT",
- "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0xc8",
"Unit": "cpu_core"
@@ -414,7 +411,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x24",
"EventName": "L2_RQSTS.SWPF_MISS",
- "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full. Available PDIST counters: 0",
+ "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
"SampleAfterValue": "200003",
"UMask": "0x28",
"Unit": "cpu_core"
@@ -424,7 +421,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x23",
"EventName": "L2_TRANS.L2_WB",
- "PublicDescription": "Counts L2 writebacks that access L2 cache. Available PDIST counters: 0",
+ "PublicDescription": "Counts L2 writebacks that access L2 cache.",
"SampleAfterValue": "200003",
"UMask": "0x40",
"Unit": "cpu_core"
@@ -434,7 +431,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x42",
"EventName": "LOCK_CYCLES.CACHE_LOCK_DURATION",
- "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION). Available PDIST counters: 0",
+ "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -454,7 +451,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.MISS",
- "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x41",
"Unit": "cpu_core"
@@ -474,7 +471,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x2e",
"EventName": "LONGEST_LAT_CACHE.REFERENCE",
- "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3. Available PDIST counters: 0",
+ "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2. It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
"SampleAfterValue": "100003",
"UMask": "0x4f",
"Unit": "cpu_core"
@@ -695,7 +692,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x43",
"EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
- "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss) Available PDIST counters: 0",
+ "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)",
"SampleAfterValue": "1000003",
"UMask": "0xfd",
"Unit": "cpu_core"
@@ -947,7 +944,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x44",
"EventName": "MEM_STORE_RETIRED.L2_HIT",
- "PublicDescription": "MEM_STORE_RETIRED.L2_HIT Available PDIST counters: 0",
"SampleAfterValue": "200003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1177,12 +1173,36 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xe5",
"EventName": "MEM_UOP_RETIRED.ANY",
- "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses Available PDIST counters: 0",
+ "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts writebacks of modified cachelines that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_M.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C0008",
+ "PublicDescription": "Counts writebacks of modified cachelines that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
+ "BriefDescription": "Counts writebacks of non-modified cachelines that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.COREWB_NONM.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C1000",
+ "PublicDescription": "Counts writebacks of non-modified cachelines that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Counts demand instruction fetches and L1 instruction cache prefetches that have any type of response.",
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xB7",
@@ -1399,11 +1419,23 @@
"Unit": "cpu_core"
},
{
+ "BriefDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches.",
+ "Counter": "0,1,2,3",
+ "EventCode": "0x2A,0x2B",
+ "EventName": "OCR.READS_TO_CORE.L3_HIT",
+ "MSRIndex": "0x1a6,0x1a7",
+ "MSRValue": "0x3F803C4477",
+ "PublicDescription": "Counts all data read, code read, RFO and ITOM requests including demands and prefetches to the core caches (L1 or L2) that hit in the L3 or were snooped from another core's caches. Available PDIST counters: 0",
+ "SampleAfterValue": "100003",
+ "UMask": "0x1",
+ "Unit": "cpu_core"
+ },
+ {
"BriefDescription": "Any memory transaction that reached the SQ.",
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
- "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc.. Available PDIST counters: 0",
+ "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..",
"SampleAfterValue": "100003",
"UMask": "0x80",
"Unit": "cpu_core"
@@ -1413,7 +1445,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DATA_RD",
- "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1423,7 +1455,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD",
- "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests. Available PDIST counters: 0",
+ "PublicDescription": "Counts both cacheable and Non-Cacheable code read requests.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1433,7 +1465,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
- "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore. Available PDIST counters: 0",
+ "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1443,7 +1475,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.DEMAND_RFO",
- "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM. Available PDIST counters: 0",
+ "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1454,7 +1486,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
- "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1465,7 +1497,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1476,7 +1508,6 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
- "PublicDescription": "Cycles where at least 1 outstanding demand data read request is pending. Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1487,7 +1518,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
- "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1497,7 +1528,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
- "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1507,7 +1537,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD",
- "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1517,7 +1547,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of outstanding demand data read requests pending. Requests are considered outstanding from the time they miss the core's L2 cache until the transaction completion message is sent to the requestor.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1527,7 +1557,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO",
- "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of off-core outstanding read-for-ownership (RFO) store transactions every cycle. An RFO transaction is considered to be in the Off-core outstanding state between L2 cache miss and transaction completion.",
"SampleAfterValue": "1000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -1537,7 +1567,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x2c",
"EventName": "SQ_MISC.BUS_LOCK",
- "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory. Available PDIST counters: 0",
+ "PublicDescription": "Counts the more expensive bus lock needed to enforce cache coherency for certain memory accesses that need to be done atomically. Can be created by issuing an atomic instruction (via the LOCK prefix) which causes a cache line split or accesses uncacheable memory.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -1547,7 +1577,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.ANY",
- "PublicDescription": "Counts the number of PREFETCHNTA, PREFETCHW, PREFETCHT0, PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0xf",
"Unit": "cpu_core"
@@ -1557,7 +1586,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.NTA",
- "PublicDescription": "Counts the number of PREFETCHNTA instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -1567,7 +1596,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
- "PublicDescription": "Counts the number of PREFETCHW instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -1577,7 +1606,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T0",
- "PublicDescription": "Counts the number of PREFETCHT0 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -1587,7 +1616,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x40",
"EventName": "SW_PREFETCH_ACCESS.T1_T2",
- "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
index ae9778aa755b..28dc5e06ee31 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/floating-point.json
@@ -15,7 +15,6 @@
"CounterMask": "1",
"EventCode": "0xb0",
"EventName": "ARITH.FPDIV_ACTIVE",
- "PublicDescription": "This event counts the cycles the floating point divider is busy. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -25,7 +24,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.FP",
- "PublicDescription": "Counts all microcode Floating Point assists. Available PDIST counters: 0",
+ "PublicDescription": "Counts all microcode Floating Point assists.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -35,7 +34,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc1",
"EventName": "ASSISTS.SSE_AVX_MIX",
- "PublicDescription": "ASSISTS.SSE_AVX_MIX Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -45,7 +43,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_0",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_0 [This event is alias to FP_ARITH_DISPATCHED.V0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -55,7 +52,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_1",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_1 [This event is alias to FP_ARITH_DISPATCHED.V1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -65,7 +61,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.PORT_5",
- "PublicDescription": "FP_ARITH_DISPATCHED.PORT_5 [This event is alias to FP_ARITH_DISPATCHED.V2] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -75,7 +70,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V0",
- "PublicDescription": "FP_ARITH_DISPATCHED.V0 [This event is alias to FP_ARITH_DISPATCHED.PORT_0] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -85,7 +79,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V1",
- "PublicDescription": "FP_ARITH_DISPATCHED.V1 [This event is alias to FP_ARITH_DISPATCHED.PORT_1] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -95,7 +88,6 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xb3",
"EventName": "FP_ARITH_DISPATCHED.V2",
- "PublicDescription": "FP_ARITH_DISPATCHED.V2 [This event is alias to FP_ARITH_DISPATCHED.PORT_5] Available PDIST counters: 0",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -105,7 +97,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -115,7 +107,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -125,7 +117,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 4 computation operations, one for each element. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -135,7 +127,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 8 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -145,7 +137,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.4_FLOPS",
- "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision and 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 2 or/and 4 computation operations, one for each element. Applies to SSE* and AVX* packed single precision floating-point and packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x18",
"Unit": "cpu_core"
@@ -155,7 +147,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision and double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -165,7 +157,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
- "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -175,7 +167,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
- "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below. Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -185,7 +177,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc7",
"EventName": "FP_ARITH_INST_RETIRED.VECTOR",
- "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events. Available PDIST counters: 0",
+ "PublicDescription": "Number of any Vector retired FP arithmetic instructions. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
"SampleAfterValue": "1000003",
"UMask": "0xfc",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
index 82727022efb6..6484834b1127 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/frontend.json
@@ -14,7 +14,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x60",
"EventName": "BACLEARS.ANY",
- "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore. Available PDIST counters: 0",
+ "PublicDescription": "Number of times the front-end is resteered when it finds a branch instruction in a fetch line. This is called Unknown Branch which occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.",
"SampleAfterValue": "100003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -24,7 +24,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.LCP",
- "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
"SampleAfterValue": "500009",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -34,7 +34,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x87",
"EventName": "DECODE.MS_BUSY",
- "PublicDescription": "Cycles the Microcode Sequencer is busy. Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -44,7 +43,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x61",
"EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
- "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE. Available PDIST counters: 0",
+ "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -405,7 +404,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALLS",
- "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
"SampleAfterValue": "500009",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -417,7 +416,6 @@
"EdgeDetect": "1",
"EventCode": "0x80",
"EventName": "ICACHE_DATA.STALL_PERIODS",
- "PublicDescription": "ICACHE_DATA.STALL_PERIODS Available PDIST counters: 0",
"SampleAfterValue": "500009",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -427,7 +425,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x83",
"EventName": "ICACHE_TAG.STALLS",
- "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
"SampleAfterValue": "200003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -438,7 +436,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -449,7 +447,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.DSB_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the DSB (Decode Stream Buffer) path. Count includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -459,7 +457,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.DSB_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
"SampleAfterValue": "2000003",
"UMask": "0x8",
"Unit": "cpu_core"
@@ -470,7 +468,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_ANY",
- "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -481,7 +479,7 @@
"CounterMask": "6",
"EventCode": "0x79",
"EventName": "IDQ.MITE_CYCLES_OK",
- "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -491,7 +489,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MITE_UOPS",
- "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB). Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
"SampleAfterValue": "2000003",
"UMask": "0x4",
"Unit": "cpu_core"
@@ -502,7 +500,7 @@
"CounterMask": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_CYCLES_ANY",
- "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE. Available PDIST counters: 0",
+ "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
"SampleAfterValue": "2000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -514,7 +512,7 @@
"EdgeDetect": "1",
"EventCode": "0x79",
"EventName": "IDQ.MS_SWITCHES",
- "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer. Available PDIST counters: 0",
+ "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
"SampleAfterValue": "100003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -524,7 +522,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x79",
"EventName": "IDQ.MS_UOPS",
- "PublicDescription": "Counts the number of uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops initiated by MITE or Decode Stream Buffer (DSB) and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.",
"SampleAfterValue": "1000003",
"UMask": "0x20",
"Unit": "cpu_core"
@@ -534,7 +532,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CORE",
- "PublicDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations. The count may be distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method. Available PDIST counters: 0",
+ "PublicDescription": "This event counts a subset of the Topdown Slots event that when no operation was delivered to the back-end pipeline due to instruction fetch limitations when the back-end could have accepted more operations. Common examples include instruction cache misses or x86 instruction decode limitations. The count may be distributed among unhalted logical processors (hyper-threads) who share the same physical core, in processors that support Intel Hyper-Threading Technology. Software can use this event as the numerator for the Frontend Bound metric (or top-level category) of the Top-down Microarchitecture Analysis method.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -545,7 +543,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -557,7 +555,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_BUBBLES.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -567,7 +565,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
- "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -578,7 +576,7 @@
"CounterMask": "6",
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
- "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_0_UOPS_DELIV.CORE]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
@@ -590,7 +588,7 @@
"EventCode": "0x9c",
"EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
"Invert": "1",
- "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK] Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle. [This event is alias to IDQ_BUBBLES.CYCLES_FE_WAS_OK]",
"SampleAfterValue": "1000003",
"UMask": "0x1",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
index 17b94f810d5a..f0cbeda4d5ca 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/memory.json
@@ -5,7 +5,6 @@
"CounterMask": "2",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS",
- "PublicDescription": "Cycles while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -16,7 +15,6 @@
"CounterMask": "6",
"EventCode": "0xa3",
"EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x6",
"Unit": "cpu_core"
@@ -90,7 +88,7 @@
"Counter": "0,1,2,3,4,5,6,7",
"EventCode": "0xc3",
"EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
- "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture Available PDIST counters: 0",
+ "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
"SampleAfterValue": "100003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -101,7 +99,6 @@
"CounterMask": "2",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS",
- "PublicDescription": "Cycles while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x2",
"Unit": "cpu_core"
@@ -112,7 +109,6 @@
"CounterMask": "3",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
- "PublicDescription": "Execution stalls while L1 cache miss demand load is outstanding. Available PDIST counters: 0",
"SampleAfterValue": "1000003",
"UMask": "0x3",
"Unit": "cpu_core"
@@ -123,7 +119,7 @@
"CounterMask": "5",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
- "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L2 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x5",
"Unit": "cpu_core"
@@ -134,7 +130,7 @@
"CounterMask": "9",
"EventCode": "0x47",
"EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
- "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock). Available PDIST counters: 0",
+ "PublicDescription": "Execution stalls while L3 cache miss demand cacheable load request is outstanding (will not count for uncacheable demand requests e.g. bus lock).",
"SampleAfterValue": "1000003",
"UMask": "0x9",
"Unit": "cpu_core"
@@ -411,7 +407,6 @@
"Counter": "0,1,2,3",
"EventCode": "0x21",
"EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Counts demand data read requests that miss the L3 cache. Available PDIST counters: 0",
"SampleAfterValue": "100003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -422,7 +417,7 @@
"CounterMask": "1",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ. Available PDIST counters: 0",
+ "PublicDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.",
"SampleAfterValue": "1000003",
"UMask": "0x10",
"Unit": "cpu_core"
@@ -432,7 +427,7 @@
"Counter": "0,1,2,3",
"EventCode": "0x20",
"EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
- "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache. Available PDIST counters: 0",
+ "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache. Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
"SampleAfterValue": "2000003",
"UMask": "0x10",
"Unit": "cpu_core"
diff --git a/tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json b/tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json
index 0088be169f9b..948c16a1f95b 100644
--- a/tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/meteorlake/mtl-metrics.json