diff options
Diffstat (limited to 'tools/perf/util')
72 files changed, 5788 insertions, 1426 deletions
diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 946bce6628f3..7910d908c814 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -161,7 +161,7 @@ perf-util-y += clockid.o perf-util-y += list_sort.o perf-util-y += mutex.o perf-util-y += sharded_mutex.o -perf-util-$(CONFIG_X86_64) += intel-tpebs.o +perf-util-y += intel-tpebs.o perf-util-$(CONFIG_LIBBPF) += bpf_map.o perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o @@ -173,6 +173,10 @@ perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-flex.o perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-filter-bison.o perf-util-$(CONFIG_PERF_BPF_SKEL) += btf.o +ifeq ($(CONFIG_TRACE),y) + perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf-trace-summary.o +endif + ifeq ($(CONFIG_LIBTRACEEVENT),y) perf-util-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o endif @@ -237,9 +241,12 @@ perf-util-y += cap.o perf-util-$(CONFIG_CXX_DEMANGLE) += demangle-cxx.o perf-util-y += demangle-ocaml.o perf-util-y += demangle-java.o -perf-util-y += demangle-rust.o +perf-util-y += demangle-rust-v0.o perf-util-$(CONFIG_LIBLLVM) += llvm-c-helpers.o +CFLAGS_demangle-rust-v0.o += -Wno-shadow -Wno-declaration-after-statement \ + -Wno-switch-default -Wno-switch-enum -Wno-missing-field-initializers + ifdef CONFIG_JITDUMP perf-util-$(CONFIG_LIBELF) += jitdump.o perf-util-$(CONFIG_LIBELF) += genelf.o diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index 456ce64ad822..4b540e6fb42d 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -19,6 +19,8 @@ static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; static bool zen4_ibs_extensions; +static bool ldlat_cap; +static bool dtlb_pgsize_cap; static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) { @@ -78,14 +80,20 @@ static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) static void pr_ibs_op_ctl(union ibs_op_ctl reg) { char l3_miss_only[sizeof(" L3MissOnly _")] = ""; + char ldlat[sizeof(" LdLatThrsh __ LdLatEn _")] = ""; if (zen4_ibs_extensions) snprintf(l3_miss_only, sizeof(l3_miss_only), " L3MissOnly %d", reg.l3_miss_only); - printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d\n", + if (ldlat_cap) { + snprintf(ldlat, sizeof(ldlat), " LdLatThrsh %2d LdLatEn %d", + reg.ldlat_thrsh, reg.ldlat_en); + } + + printf("ibs_op_ctl:\t%016llx MaxCnt %9d%s En %d Val %d CntCtl %d=%s CurCnt %9d%s\n", reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, l3_miss_only, reg.op_en, reg.op_val, reg.cnt_ctl, - reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); + reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt, ldlat); } static void pr_ibs_op_data(union ibs_op_data reg) @@ -154,9 +162,20 @@ static void pr_ibs_op_data2(union ibs_op_data2 reg) static void pr_ibs_op_data3(union ibs_op_data3 reg) { - char l2_miss_str[sizeof(" L2Miss _")] = ""; - char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; + static const char * const dc_page_sizes[] = { + " 4K", + " 2M", + " 1G", + " ??", + }; char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = ""; + char dc_l1_l2tlb_miss_str[sizeof(" DcL1TlbMiss _ DcL2TlbMiss _")] = ""; + char dc_l1tlb_hit_str[sizeof(" DcL1TlbHit2M _ DcL1TlbHit1G _")] = ""; + char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; + char dc_l2tlb_hit_2m_str[sizeof(" DcL2TlbHit2M _")] = ""; + char dc_l2tlb_hit_1g_str[sizeof(" DcL2TlbHit1G _")] = ""; + char dc_page_size_str[sizeof(" DcPageSize ____")] = ""; + char l2_miss_str[sizeof(" L2Miss _")] = ""; /* * Erratum #1293 @@ -172,16 +191,40 @@ static void pr_ibs_op_data3(union ibs_op_data3 reg) snprintf(op_mem_width_str, sizeof(op_mem_width_str), " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1)); - printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d " - "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d " - "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d " - "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n", - reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss, - reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss, - reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op, - reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, - reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str, - op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat); + if (dtlb_pgsize_cap) { + if (reg.dc_phy_addr_valid) { + int idx = (reg.dc_l1tlb_hit_1g << 1) | reg.dc_l1tlb_hit_2m; + + snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str), + " DcL1TlbMiss %d DcL2TlbMiss %d", + reg.dc_l1tlb_miss, reg.dc_l2tlb_miss); + snprintf(dc_page_size_str, sizeof(dc_page_size_str), + " DcPageSize %4s", dc_page_sizes[idx]); + } + } else { + snprintf(dc_l1_l2tlb_miss_str, sizeof(dc_l1_l2tlb_miss_str), + " DcL1TlbMiss %d DcL2TlbMiss %d", + reg.dc_l1tlb_miss, reg.dc_l2tlb_miss); + snprintf(dc_l1tlb_hit_str, sizeof(dc_l1tlb_hit_str), + " DcL1TlbHit2M %d DcL1TlbHit1G %d", + reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g); + snprintf(dc_l2tlb_hit_2m_str, sizeof(dc_l2tlb_hit_2m_str), + " DcL2TlbHit2M %d", reg.dc_l2tlb_hit_2m); + snprintf(dc_l2tlb_hit_1g_str, sizeof(dc_l2tlb_hit_1g_str), + " DcL2TlbHit1G %d", reg.dc_l2_tlb_hit_1g); + } + + printf("ibs_op_data3:\t%016llx LdOp %d StOp %d%s%s%s DcMiss %d DcMisAcc %d " + "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d " + "DcLinAddrValid %d DcPhyAddrValid %d%s%s SwPf %d%s%s " + "DcMissLat %5d TlbRefillLat %5d\n", + reg.val, reg.ld_op, reg.st_op, dc_l1_l2tlb_miss_str, + dtlb_pgsize_cap ? dc_page_size_str : dc_l1tlb_hit_str, + dc_l2tlb_hit_2m_str, reg.dc_miss, reg.dc_mis_acc, reg.dc_wc_mem_acc, + reg.dc_uc_mem_acc, reg.dc_locked_op, reg.dc_miss_no_mab_alloc, + reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, dc_l2tlb_hit_1g_str, + l2_miss_str, reg.sw_pf, op_mem_width_str, op_dc_miss_open_mem_reqs_str, + reg.dc_miss_lat, reg.tlb_refill_lat); } /* @@ -331,6 +374,12 @@ bool evlist__has_amd_ibs(struct evlist *evlist) if (perf_env__find_pmu_cap(env, "ibs_op", "zen4_ibs_extensions")) zen4_ibs_extensions = 1; + if (perf_env__find_pmu_cap(env, "ibs_op", "ldlat")) + ldlat_cap = 1; + + if (perf_env__find_pmu_cap(env, "ibs_op", "dtlb_pgsize")) + dtlb_pgsize_cap = 1; + if (ibs_fetch_type || ibs_op_type) { if (!cpu_family) parse_cpuid(env); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 1e59b9e5339d..264a212b47df 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2280,6 +2280,7 @@ void annotation_options__init(void) opt->annotate_src = true; opt->offset_level = ANNOTATION__OFFSET_JUMP_TARGETS; opt->percent_type = PERCENT_PERIOD_LOCAL; + opt->hide_src_code_on_title = true; } void annotation_options__exit(void) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 0e6e3f60a897..bbb89b32f398 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -44,6 +44,7 @@ enum perf_disassembler { struct annotation_options { bool hide_src_code, + hide_src_code_on_title, use_offset, jump_arrows, print_lines, diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 5d232188643b..881d9f29c138 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -82,6 +82,23 @@ enum arm_spe_ampereone_data_source { ARM_SPE_AMPEREONE_L2D = 0x9, }; +enum arm_spe_hisi_hip_data_source { + ARM_SPE_HISI_HIP_PEER_CPU = 0, + ARM_SPE_HISI_HIP_PEER_CPU_HITM = 1, + ARM_SPE_HISI_HIP_L3 = 2, + ARM_SPE_HISI_HIP_L3_HITM = 3, + ARM_SPE_HISI_HIP_PEER_CLUSTER = 4, + ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM = 5, + ARM_SPE_HISI_HIP_REMOTE_SOCKET = 6, + ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM = 7, + ARM_SPE_HISI_HIP_LOCAL_MEM = 8, + ARM_SPE_HISI_HIP_REMOTE_MEM = 9, + ARM_SPE_HISI_HIP_NC_DEV = 13, + ARM_SPE_HISI_HIP_L2 = 16, + ARM_SPE_HISI_HIP_L2_HITM = 17, + ARM_SPE_HISI_HIP_L1 = 18, +}; + struct arm_spe_record { enum arm_spe_sample_type type; int err; diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 2a9775649cc2..d46e0cccac99 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -571,6 +571,11 @@ static const struct midr_range ampereone_ds_encoding_cpus[] = { {}, }; +static const struct midr_range hisi_hip_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_HISI_HIP12), + {}, +}; + static void arm_spe__sample_flags(struct arm_spe_queue *speq) { const struct arm_spe_record *record = &speq->decoder->record; @@ -718,9 +723,100 @@ static void arm_spe__synth_data_source_ampereone(const struct arm_spe_record *re arm_spe__synth_data_source_common(&common_record, data_src); } +static void arm_spe__synth_data_source_hisi_hip(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) +{ + /* Use common synthesis method to handle store operations */ + if (record->op & ARM_SPE_OP_ST) { + arm_spe__synth_data_source_common(record, data_src); + return; + } + + switch (record->source) { + case ARM_SPE_HISI_HIP_PEER_CPU: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CPU_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_L3: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HIT; + break; + case ARM_SPE_HISI_HIP_L3_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + break; + case ARM_SPE_HISI_HIP_PEER_CLUSTER: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_PEER_CLUSTER_HITM: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_REMOTE_SOCKET: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_REMOTE_SOCKET_HITM: + data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; + break; + case ARM_SPE_HISI_HIP_LOCAL_MEM: + data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_REMOTE_MEM: + data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; + data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; + break; + case ARM_SPE_HISI_HIP_NC_DEV: + data_src->mem_lvl = PERF_MEM_LVL_IO | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L2: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + case ARM_SPE_HISI_HIP_L2_HITM: + data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; + data_src->mem_snoop = PERF_MEM_SNOOP_HITM; + break; + case ARM_SPE_HISI_HIP_L1: + data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; + data_src->mem_snoop = PERF_MEM_SNOOP_NONE; + break; + default: + break; + } +} + static const struct data_source_handle data_source_handles[] = { DS(common_ds_encoding_cpus, data_source_common), DS(ampereone_ds_encoding_cpus, data_source_ampereone), + DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip), }; static void arm_spe__synth_memory_level(const struct arm_spe_record *record, diff --git a/tools/perf/util/bpf-trace-summary.c b/tools/perf/util/bpf-trace-summary.c new file mode 100644 index 000000000000..69fb165da206 --- /dev/null +++ b/tools/perf/util/bpf-trace-summary.c @@ -0,0 +1,458 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <inttypes.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> + +#include "dwarf-regs.h" /* for EM_HOST */ +#include "syscalltbl.h" +#include "util/cgroup.h" +#include "util/hashmap.h" +#include "util/trace.h" +#include "util/util.h" +#include <bpf/bpf.h> +#include <linux/rbtree.h> +#include <linux/time64.h> +#include <tools/libc_compat.h> /* reallocarray */ + +#include "bpf_skel/syscall_summary.h" +#include "bpf_skel/syscall_summary.skel.h" + + +static struct syscall_summary_bpf *skel; +static struct rb_root cgroups = RB_ROOT; + +int trace_prepare_bpf_summary(enum trace_summary_mode mode) +{ + skel = syscall_summary_bpf__open(); + if (skel == NULL) { + fprintf(stderr, "failed to open syscall summary bpf skeleton\n"); + return -1; + } + + if (mode == SUMMARY__BY_THREAD) + skel->rodata->aggr_mode = SYSCALL_AGGR_THREAD; + else if (mode == SUMMARY__BY_CGROUP) + skel->rodata->aggr_mode = SYSCALL_AGGR_CGROUP; + else + skel->rodata->aggr_mode = SYSCALL_AGGR_CPU; + + if (cgroup_is_v2("perf_event") > 0) + skel->rodata->use_cgroup_v2 = 1; + + if (syscall_summary_bpf__load(skel) < 0) { + fprintf(stderr, "failed to load syscall summary bpf skeleton\n"); + return -1; + } + + if (syscall_summary_bpf__attach(skel) < 0) { + fprintf(stderr, "failed to attach syscall summary bpf skeleton\n"); + return -1; + } + + if (mode == SUMMARY__BY_CGROUP) + read_all_cgroups(&cgroups); + + return 0; +} + +void trace_start_bpf_summary(void) +{ + skel->bss->enabled = 1; +} + +void trace_end_bpf_summary(void) +{ + skel->bss->enabled = 0; +} + +struct syscall_node { + int syscall_nr; + struct syscall_stats stats; +}; + +static double rel_stddev(struct syscall_stats *stat) +{ + double variance, average; + + if (stat->count < 2) + return 0; + + average = (double)stat->total_time / stat->count; + + variance = stat->squared_sum; + variance -= (stat->total_time * stat->total_time) / stat->count; + variance /= stat->count - 1; + + return 100 * sqrt(variance / stat->count) / average; +} + +/* + * The syscall_data is to maintain syscall stats ordered by total time. + * It supports different summary modes like per-thread or global. + * + * For per-thread stats, it uses two-level data strurcture - + * syscall_data is keyed by TID and has an array of nodes which + * represents each syscall for the thread. + * + * For global stats, it's still two-level technically but we don't need + * per-cpu analysis so it's keyed by the syscall number to combine stats + * from different CPUs. And syscall_data always has a syscall_node so + * it can effectively work as flat hierarchy. + * + * For per-cgroup stats, it uses two-level data structure like thread + * syscall_data is keyed by CGROUP and has an array of node which + * represents each syscall for the cgroup. + */ +struct syscall_data { + u64 key; /* tid if AGGR_THREAD, syscall-nr if AGGR_CPU, cgroup if AGGR_CGROUP */ + int nr_events; + int nr_nodes; + u64 total_time; + struct syscall_node *nodes; +}; + +static int datacmp(const void *a, const void *b) +{ + const struct syscall_data * const *sa = a; + const struct syscall_data * const *sb = b; + + return (*sa)->total_time > (*sb)->total_time ? -1 : 1; +} + +static int nodecmp(const void *a, const void *b) +{ + const struct syscall_node *na = a; + const struct syscall_node *nb = b; + + return na->stats.total_time > nb->stats.total_time ? -1 : 1; +} + +static size_t sc_node_hash(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool sc_node_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +static int print_common_stats(struct syscall_data *data, FILE *fp) +{ + int printed = 0; + + for (int i = 0; i < data->nr_nodes; i++) { + struct syscall_node *node = &data->nodes[i]; + struct syscall_stats *stat = &node->stats; + double total = (double)(stat->total_time) / NSEC_PER_MSEC; + double min = (double)(stat->min_time) / NSEC_PER_MSEC; + double max = (double)(stat->max_time) / NSEC_PER_MSEC; + double avg = total / stat->count; + const char *name; + + /* TODO: support other ABIs */ + name = syscalltbl__name(EM_HOST, node->syscall_nr); + if (name) + printed += fprintf(fp, " %-15s", name); + else + printed += fprintf(fp, " syscall:%-7d", node->syscall_nr); + + printed += fprintf(fp, " %8u %6u %9.3f %9.3f %9.3f %9.3f %9.2f%%\n", + stat->count, stat->error, total, min, avg, max, + rel_stddev(stat)); + } + return printed; +} + +static int update_thread_stats(struct hashmap *hash, struct syscall_key *map_key, + struct syscall_stats *map_data) +{ + struct syscall_data *data; + struct syscall_node *nodes; + + if (!hashmap__find(hash, map_key->cpu_or_tid, &data)) { + data = zalloc(sizeof(*data)); + if (data == NULL) + return -ENOMEM; + + data->key = map_key->cpu_or_tid; + if (hashmap__add(hash, data->key, data) < 0) { + free(data); + return -ENOMEM; + } + } + + /* update thread total stats */ + data->nr_events += map_data->count; + data->total_time += map_data->total_time; + + nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes)); + if (nodes == NULL) + return -ENOMEM; + + data->nodes = nodes; + nodes = &data->nodes[data->nr_nodes++]; + nodes->syscall_nr = map_key->nr; + + /* each thread has an entry for each syscall, just use the stat */ + memcpy(&nodes->stats, map_data, sizeof(*map_data)); + return 0; +} + +static int print_thread_stat(struct syscall_data *data, FILE *fp) +{ + int printed = 0; + + qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp); + + printed += fprintf(fp, " thread (%d), ", (int)data->key); + printed += fprintf(fp, "%d events\n\n", data->nr_events); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + printed += print_common_stats(data, fp); + printed += fprintf(fp, "\n\n"); + + return printed; +} + +static int print_thread_stats(struct syscall_data **data, int nr_data, FILE *fp) +{ + int printed = 0; + + for (int i = 0; i < nr_data; i++) + printed += print_thread_stat(data[i], fp); + + return printed; +} + +static int update_total_stats(struct hashmap *hash, struct syscall_key *map_key, + struct syscall_stats *map_data) +{ + struct syscall_data *data; + struct syscall_stats *stat; + + if (!hashmap__find(hash, map_key->nr, &data)) { + data = zalloc(sizeof(*data)); + if (data == NULL) + return -ENOMEM; + + data->nodes = zalloc(sizeof(*data->nodes)); + if (data->nodes == NULL) { + free(data); + return -ENOMEM; + } + + data->nr_nodes = 1; + data->key = map_key->nr; + data->nodes->syscall_nr = data->key; + + if (hashmap__add(hash, data->key, data) < 0) { + free(data->nodes); + free(data); + return -ENOMEM; + } + } + + /* update total stats for this syscall */ + data->nr_events += map_data->count; + data->total_time += map_data->total_time; + + /* This is sum of the same syscall from different CPUs */ + stat = &data->nodes->stats; + + stat->total_time += map_data->total_time; + stat->squared_sum += map_data->squared_sum; + stat->count += map_data->count; + stat->error += map_data->error; + + if (stat->max_time < map_data->max_time) + stat->max_time = map_data->max_time; + if (stat->min_time > map_data->min_time || stat->min_time == 0) + stat->min_time = map_data->min_time; + + return 0; +} + +static int print_total_stats(struct syscall_data **data, int nr_data, FILE *fp) +{ + int printed = 0; + int nr_events = 0; + + for (int i = 0; i < nr_data; i++) + nr_events += data[i]->nr_events; + + printed += fprintf(fp, " total, %d events\n\n", nr_events); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + for (int i = 0; i < nr_data; i++) + printed += print_common_stats(data[i], fp); + + printed += fprintf(fp, "\n\n"); + return printed; +} + +static int update_cgroup_stats(struct hashmap *hash, struct syscall_key *map_key, + struct syscall_stats *map_data) +{ + struct syscall_data *data; + struct syscall_node *nodes; + + if (!hashmap__find(hash, map_key->cgroup, &data)) { + data = zalloc(sizeof(*data)); + if (data == NULL) + return -ENOMEM; + + data->key = map_key->cgroup; + if (hashmap__add(hash, data->key, data) < 0) { + free(data); + return -ENOMEM; + } + } + + /* update thread total stats */ + data->nr_events += map_data->count; + data->total_time += map_data->total_time; + + nodes = reallocarray(data->nodes, data->nr_nodes + 1, sizeof(*nodes)); + if (nodes == NULL) + return -ENOMEM; + + data->nodes = nodes; + nodes = &data->nodes[data->nr_nodes++]; + nodes->syscall_nr = map_key->nr; + + /* each thread has an entry for each syscall, just use the stat */ + memcpy(&nodes->stats, map_data, sizeof(*map_data)); + return 0; +} + +static int print_cgroup_stat(struct syscall_data *data, FILE *fp) +{ + int printed = 0; + struct cgroup *cgrp = __cgroup__find(&cgroups, data->key); + + qsort(data->nodes, data->nr_nodes, sizeof(*data->nodes), nodecmp); + + if (cgrp) + printed += fprintf(fp, " cgroup %s,", cgrp->name); + else + printed += fprintf(fp, " cgroup id:%lu,", (unsigned long)data->key); + + printed += fprintf(fp, " %d events\n\n", data->nr_events); + + printed += fprintf(fp, " syscall calls errors total min avg max stddev\n"); + printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); + printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); + + printed += print_common_stats(data, fp); + printed += fprintf(fp, "\n\n"); + + return printed; +} + +static int print_cgroup_stats(struct syscall_data **data, int nr_data, FILE *fp) +{ + int printed = 0; + + for (int i = 0; i < nr_data; i++) + printed += print_cgroup_stat(data[i], fp); + + return printed; +} + +int trace_print_bpf_summary(FILE *fp) +{ + struct bpf_map *map = skel->maps.syscall_stats_map; + struct syscall_key *prev_key, key; + struct syscall_data **data = NULL; + struct hashmap schash; + struct hashmap_entry *entry; + int nr_data = 0; + int printed = 0; + int i; + size_t bkt; + + hashmap__init(&schash, sc_node_hash, sc_node_equal, /*ctx=*/NULL); + + printed = fprintf(fp, "\n Summary of events:\n\n"); + + /* get stats from the bpf map */ + prev_key = NULL; + while (!bpf_map__get_next_key(map, prev_key, &key, sizeof(key))) { + struct syscall_stats stat; + + if (!bpf_map__lookup_elem(map, &key, sizeof(key), &stat, sizeof(stat), 0)) { + switch (skel->rodata->aggr_mode) { + case SYSCALL_AGGR_THREAD: + update_thread_stats(&schash, &key, &stat); + break; + case SYSCALL_AGGR_CPU: + update_total_stats(&schash, &key, &stat); + break; + case SYSCALL_AGGR_CGROUP: + update_cgroup_stats(&schash, &key, &stat); + break; + default: + break; + } + } + + prev_key = &key; + } + + nr_data = hashmap__size(&schash); + data = calloc(nr_data, sizeof(*data)); + if (data == NULL) + goto out; + + i = 0; + hashmap__for_each_entry(&schash, entry, bkt) + data[i++] = entry->pvalue; + + qsort(data, nr_data, sizeof(*data), datacmp); + + switch (skel->rodata->aggr_mode) { + case SYSCALL_AGGR_THREAD: + printed += print_thread_stats(data, nr_data, fp); + break; + case SYSCALL_AGGR_CPU: + printed += print_total_stats(data, nr_data, fp); + break; + case SYSCALL_AGGR_CGROUP: + printed += print_cgroup_stats(data, nr_data, fp); + break; + default: + break; + } + + for (i = 0; i < nr_data && data; i++) { + free(data[i]->nodes); + free(data[i]); + } + free(data); + +out: + hashmap__clear(&schash); + return printed; +} + +void trace_cleanup_bpf_summary(void) +{ + if (!RB_EMPTY_ROOT(&cgroups)) { + struct cgroup *cgrp, *tmp; + + rbtree_postorder_for_each_entry_safe(cgrp, tmp, &cgroups, node) + cgroup__put(cgrp); + + cgroups = RB_ROOT; + } + + syscall_summary_bpf__destroy(skel); +} diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index 5af8f6d1bc95..60b81d586323 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -12,6 +12,7 @@ #include "util/lock-contention.h" #include <linux/zalloc.h> #include <linux/string.h> +#include <api/fs/fs.h> #include <bpf/bpf.h> #include <bpf/btf.h> #include <inttypes.h> @@ -35,28 +36,26 @@ static bool slab_cache_equal(long key1, long key2, void *ctx __maybe_unused) static void check_slab_cache_iter(struct lock_contention *con) { - struct btf *btf = btf__load_vmlinux_btf(); s32 ret; hashmap__init(&slab_hash, slab_cache_hash, slab_cache_equal, /*ctx=*/NULL); - if (btf == NULL) { + con->btf = btf__load_vmlinux_btf(); + if (con->btf == NULL) { pr_debug("BTF loading failed: %s\n", strerror(errno)); return; } - ret = btf__find_by_name_kind(btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT); + ret = btf__find_by_name_kind(con->btf, "bpf_iter__kmem_cache", BTF_KIND_STRUCT); if (ret < 0) { bpf_program__set_autoload(skel->progs.slab_cache_iter, false); pr_debug("slab cache iterator is not available: %d\n", ret); - goto out; + return; } has_slab_iter = true; bpf_map__set_max_entries(skel->maps.slab_caches, con->map_nr_entries); -out: - btf__free(btf); } static void run_slab_cache_iter(void) @@ -109,6 +108,75 @@ static void exit_slab_cache_iter(void) hashmap__clear(&slab_hash); } +static void init_numa_data(struct lock_contention *con) +{ + struct symbol *sym; + struct map *kmap; + char *buf = NULL, *p; + size_t len; + long last = -1; + int ret; + + /* + * 'struct zone' is embedded in 'struct pglist_data' as an array. + * As we may not have full information of the struct zone in the + * (fake) vmlinux.h, let's get the actual size from BTF. + */ + ret = btf__find_by_name_kind(con->btf, "zone", BTF_KIND_STRUCT); + if (ret < 0) { + pr_debug("cannot get type of struct zone: %d\n", ret); + return; + } + + ret = btf__resolve_size(con->btf, ret); + if (ret < 0) { + pr_debug("cannot get size of struct zone: %d\n", ret); + return; + } + skel->rodata->sizeof_zone = ret; + + /* UMA system doesn't have 'node_data[]' - just use contig_page_data. */ + sym = machine__find_kernel_symbol_by_name(con->machine, + "contig_page_data", + &kmap); + if (sym) { + skel->rodata->contig_page_data_addr = map__unmap_ip(kmap, sym->start); + map__put(kmap); + return; + } + + /* + * The 'node_data' is an array of pointers to struct pglist_data. + * It needs to follow the pointer for each node in BPF to get the + * address of struct pglist_data and its zones. + */ + sym = machine__find_kernel_symbol_by_name(con->machine, + "node_data", + &kmap); + if (sym == NULL) + return; + + skel->rodata->node_data_addr = map__unmap_ip(kmap, sym->start); + map__put(kmap); + + /* get the number of online nodes using the last node number + 1 */ + ret = sysfs__read_str("devices/system/node/online", &buf, &len); + if (ret < 0) { + pr_debug("failed to read online node: %d\n", ret); + return; + } + + p = buf; + while (p && *p) { + last = strtol(p, &p, 0); + + if (p && (*p == ',' || *p == '-' || *p == '\n')) + p++; + } + skel->rodata->nr_nodes = last + 1; + free(buf); +} + int lock_contention_prepare(struct lock_contention *con) { int i, fd; @@ -193,6 +261,27 @@ int lock_contention_prepare(struct lock_contention *con) skel->rodata->has_addr = 1; } + /* resolve lock name in delays */ + if (con->nr_delays) { + struct symbol *sym; + struct map *kmap; + + for (i = 0; i < con->nr_delays; i++) { + sym = machine__find_kernel_symbol_by_name(con->machine, + con->delays[i].sym, + &kmap); + if (sym == NULL) { + pr_warning("ignore unknown symbol: %s\n", + con->delays[i].sym); + continue; + } + + con->delays[i].addr = map__unmap_ip(kmap, sym->start); + } + skel->rodata->lock_delay = 1; + bpf_map__set_max_entries(skel->maps.lock_delays, con->nr_delays); + } + bpf_map__set_max_entries(skel->maps.cpu_filter, ncpus); bpf_map__set_max_entries(skel->maps.task_filter, ntasks); bpf_map__set_max_entries(skel->maps.type_filter, ntypes); @@ -218,6 +307,8 @@ int lock_contention_prepare(struct lock_contention *con) bpf_map__set_max_entries(skel->maps.slab_filter, nslabs); + init_numa_data(con); + if (lock_contention_bpf__load(skel) < 0) { pr_err("Failed to load lock-contention BPF skeleton\n"); return -1; @@ -282,6 +373,13 @@ int lock_contention_prepare(struct lock_contention *con) bpf_map_update_elem(fd, &con->filters->cgrps[i], &val, BPF_ANY); } + if (con->nr_delays) { + fd = bpf_map__fd(skel->maps.lock_delays); + + for (i = 0; i < con->nr_delays; i++) + bpf_map_update_elem(fd, &con->delays[i].addr, &con->delays[i].time, BPF_ANY); + } + if (con->aggr_mode == LOCK_AGGR_CGROUP) read_all_cgroups(&con->cgroups); @@ -505,6 +603,11 @@ static const char *lock_contention_get_name(struct lock_contention *con, return "rq_lock"; } + if (!bpf_map_lookup_elem(lock_fd, &key->lock_addr_or_cgroup, &flags)) { + if (flags == LOCK_CLASS_ZONE_LOCK) + return "zone_lock"; + } + /* look slab_hash for dynamic locks in a slab object */ if (hashmap__find(&slab_hash, flags & LCB_F_SLAB_ID_MASK, &slab_data)) { snprintf(name_buf, sizeof(name_buf), "&%s", slab_data->name); @@ -743,6 +846,7 @@ int lock_contention_finish(struct lock_contention *con) } exit_slab_cache_iter(); + btf__free(con->btf); return 0; } diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c index 4269b41d1771..c367fefe6ecb 100644 --- a/tools/perf/util/bpf_off_cpu.c +++ b/tools/perf/util/bpf_off_cpu.c @@ -13,6 +13,8 @@ #include "util/cgroup.h" #include "util/strlist.h" #include <bpf/bpf.h> +#include <internal/xyarray.h> +#include <linux/time64.h> #include "bpf_skel/off_cpu.skel.h" @@ -36,34 +38,25 @@ union off_cpu_data { u64 array[1024 / sizeof(u64)]; }; +u64 off_cpu_raw[MAX_STACKS + 5]; + static int off_cpu_config(struct evlist *evlist) { + char off_cpu_event[64]; struct evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_BPF_OUTPUT, - .size = sizeof(attr), /* to capture ABI version */ - }; - char *evname = strdup(OFFCPU_EVENT); - if (evname == NULL) - return -ENOMEM; - - evsel = evsel__new(&attr); - if (!evsel) { - free(evname); - return -ENOMEM; + scnprintf(off_cpu_event, sizeof(off_cpu_event), "bpf-output/name=%s/", OFFCPU_EVENT); + if (parse_event(evlist, off_cpu_event)) { + pr_err("Failed to open off-cpu event\n"); + return -1; } - evsel->core.attr.freq = 1; - evsel->core.attr.sample_period = 1; - /* off-cpu analysis depends on stack trace */ - evsel->core.attr.sample_type = PERF_SAMPLE_CALLCHAIN; - - evlist__add(evlist, evsel); - - free(evsel->name); - evsel->name = evname; + evlist__for_each_entry(evlist, evsel) { + if (evsel__is_offcpu_event(evsel)) { + evsel->core.system_wide = true; + break; + } + } return 0; } @@ -71,6 +64,9 @@ static int off_cpu_config(struct evlist *evlist) static void off_cpu_start(void *arg) { struct evlist *evlist = arg; + struct evsel *evsel; + struct perf_cpu pcpu; + int i; /* update task filter for the given workload */ if (skel->rodata->has_task && skel->rodata->uses_tgid && @@ -84,6 +80,26 @@ static void off_cpu_start(void *arg) bpf_map_update_elem(fd, &pid, &val, BPF_ANY); } + /* update BPF perf_event map */ + evsel = evlist__find_evsel_by_str(evlist, OFFCPU_EVENT); + if (evsel == NULL) { + pr_err("%s evsel not found\n", OFFCPU_EVENT); + return; + } + + perf_cpu_map__for_each_cpu(pcpu, i, evsel->core.cpus) { + int err; + int cpu_nr = pcpu.cpu; + + err = bpf_map__update_elem(skel->maps.offcpu_output, &cpu_nr, sizeof(int), + xyarray__entry(evsel->core.fd, cpu_nr, 0), + sizeof(int), BPF_ANY); + if (err) { + pr_err("Failed to update perf event map for direct off-cpu dumping\n"); + return; + } + } + skel->bss->enabled = 1; } @@ -277,6 +293,8 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target, } } + skel->bss->offcpu_thresh_ns = opts->off_cpu_thresh_ns; + err = off_cpu_bpf__attach(skel); if (err) { pr_err("Failed to attach off-cpu BPF skeleton\n"); @@ -300,6 +318,7 @@ int off_cpu_write(struct perf_session *session) { int bytes = 0, size; int fd, stack; + u32 raw_size; u64 sample_type, val, sid = 0; struct evsel *evsel; struct perf_data_file *file = &session->data->file; @@ -339,46 +358,54 @@ int off_cpu_write(struct perf_session *session) while (!bpf_map_get_next_key(fd, &prev, &key)) { int n = 1; /* start from perf_event_header */ - int ip_pos = -1; bpf_map_lookup_elem(fd, &key, &val); + /* zero-fill some of the fields, will be overwritten by raw_data when parsing */ if (sample_type & PERF_SAMPLE_IDENTIFIER) data.array[n++] = sid; - if (sample_type & PERF_SAMPLE_IP) { - ip_pos = n; + if (sample_type & PERF_SAMPLE_IP) data.array[n++] = 0; /* will be updated */ - } if (sample_type & PERF_SAMPLE_TID) - data.array[n++] = (u64)key.pid << 32 | key.tgid; + data.array[n++] = 0; if (sample_type & PERF_SAMPLE_TIME) data.array[n++] = tstamp; - if (sample_type & PERF_SAMPLE_ID) - data.array[n++] = sid; if (sample_type & PERF_SAMPLE_CPU) data.array[n++] = 0; if (sample_type & PERF_SAMPLE_PERIOD) - data.array[n++] = val; - if (sample_type & PERF_SAMPLE_CALLCHAIN) { - int len = 0; - - /* data.array[n] is callchain->nr (updated later) */ - data.array[n + 1] = PERF_CONTEXT_USER; - data.array[n + 2] = 0; - - bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]); - while (data.array[n + 2 + len]) + data.array[n++] = 0; + if (sample_type & PERF_SAMPLE_RAW) { + /* + * [ size ][ data ] + * [ data ] + * [ data ] + * [ data ] + * [ data ][ empty] + */ + int len = 0, i = 0; + void *raw_data = (void *)data.array + n * sizeof(u64); + + off_cpu_raw[i++] = (u64)key.pid << 32 | key.tgid; + off_cpu_raw[i++] = val; + + /* off_cpu_raw[i] is callchain->nr (updated later) */ + off_cpu_raw[i + 1] = PERF_CONTEXT_USER; + off_cpu_raw[i + 2] = 0; + + bpf_map_lookup_elem(stack, &key.stack_id, &off_cpu_raw[i + 2]); + while (off_cpu_raw[i + 2 + len]) len++; - /* update length of callchain */ - data.array[n] = len + 1; + off_cpu_raw[i] = len + 1; + i += len + 2; + + off_cpu_raw[i++] = key.cgroup_id; - /* update sample ip with the first callchain entry */ - if (ip_pos >= 0) - data.array[ip_pos] = data.array[n + 2]; + raw_size = i * sizeof(u64) + sizeof(u32); /* 4 bytes for alignment */ + memcpy(raw_data, &raw_size, sizeof(raw_size)); + memcpy(raw_data + sizeof(u32), off_cpu_raw, i * sizeof(u64)); - /* calculate sample callchain data array length */ - n += len + 2; + n += i + 1; } if (sample_type & PERF_SAMPLE_CGROUP) data.array[n++] = key.cgroup_id; diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c index 69be7a4234e0..96e7d853b9ed 100644 --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c @@ -11,6 +11,12 @@ /* for collect_lock_syms(). 4096 was rejected by the verifier */ #define MAX_CPUS 1024 +/* for collect_zone_lock(). It should be more than the actual zones. */ +#define MAX_ZONES 10 + +/* for do_lock_delay(). Arbitrarily set to 1 million. */ +#define MAX_LOOP (1U << 20) + /* lock contention flags from include/trace/events/lock.h */ #define LCB_F_SPIN (1U << 0) #define LCB_F_READ (1U << 1) @@ -146,6 +152,13 @@ struct { __uint(max_entries, 1); } slab_caches SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u64)); + __uint(value_size, sizeof(__u64)); + __uint(max_entries, 1); +} lock_delays SEC(".maps"); + struct rw_semaphore___old { struct task_struct *owner; } __attribute__((preserve_access_index)); @@ -176,6 +189,7 @@ const volatile int stack_skip; const volatile int lock_owner; const volatile int use_cgroup_v2; const volatile int max_stack; +const volatile int lock_delay; /* determine the key of lock stat */ const volatile int aggr_mode; @@ -384,6 +398,35 @@ static inline __u32 check_lock_type(__u64 lock, __u32 flags) return 0; } +static inline long delay_callback(__u64 idx, void *arg) +{ + __u64 target = *(__u64 *)arg; + + if (target <= bpf_ktime_get_ns()) + return 1; + + /* just to kill time */ + (void)bpf_get_prandom_u32(); + + return 0; +} + +static inline void do_lock_delay(__u64 duration) +{ + __u64 target = bpf_ktime_get_ns() + duration; + + bpf_loop(MAX_LOOP, delay_callback, &target, /*flags=*/0); +} + +static inline void check_lock_delay(__u64 lock) +{ + __u64 *delay; + + delay = bpf_map_lookup_elem(&lock_delays, &lock); + if (delay) + do_lock_delay(*delay); +} + static inline struct tstamp_data *get_tstamp_elem(__u32 flags) { __u32 pid; @@ -793,6 +836,9 @@ found: update_contention_data(data, duration, 1); out: + if (lock_delay) + check_lock_delay(pelem->lock); + pelem->lock = 0; if (need_delete) bpf_map_delete_elem(&tstamp, &pid); @@ -801,6 +847,11 @@ out: extern struct rq runqueues __ksym; +const volatile __u64 contig_page_data_addr; +const volatile __u64 node_data_addr; +const volatile int nr_nodes; +const volatile int sizeof_zone; + struct rq___old { raw_spinlock_t lock; } __attribute__((preserve_access_index)); @@ -809,6 +860,59 @@ struct rq___new { raw_spinlock_t __lock; } __attribute__((preserve_access_index)); +static void collect_zone_lock(void) +{ + __u64 nr_zones, zone_off; + __u64 lock_addr, lock_off; + __u32 lock_flag = LOCK_CLASS_ZONE_LOCK; + + zone_off = offsetof(struct pglist_data, node_zones); + lock_off = offsetof(struct zone, lock); + + if (contig_page_data_addr) { + struct pglist_data *contig_page_data; + + contig_page_data = (void *)(long)contig_page_data_addr; + nr_zones = BPF_CORE_READ(contig_page_data, nr_zones); + + for (int i = 0; i < MAX_ZONES; i++) { + __u64 zone_addr; + + if (i >= nr_zones) + break; + + zone_addr = contig_page_data_addr + (sizeof_zone * i) + zone_off; + lock_addr = zone_addr + lock_off; + + bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); + } + } else if (nr_nodes > 0) { + struct pglist_data **node_data = (void *)(long)node_data_addr; + + for (int i = 0; i < nr_nodes; i++) { + struct pglist_data *pgdat = NULL; + int err; + + err = bpf_core_read(&pgdat, sizeof(pgdat), &node_data[i]); + if (err < 0 || pgdat == NULL) + break; + + nr_zones = BPF_CORE_READ(pgdat, nr_zones); + for (int k = 0; k < MAX_ZONES; k++) { + __u64 zone_addr; + + if (k >= nr_zones) + break; + + zone_addr = (__u64)(void *)pgdat + (sizeof_zone * k) + zone_off; + lock_addr = zone_addr + lock_off; + + bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); + } + } + } +} + SEC("raw_tp/bpf_test_finish") int BPF_PROG(collect_lock_syms) { @@ -830,6 +934,9 @@ int BPF_PROG(collect_lock_syms) lock_flag = LOCK_CLASS_RQLOCK; bpf_map_update_elem(&lock_syms, &lock_addr, &lock_flag, BPF_ANY); } + + collect_zone_lock(); + return 0; } diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h index 15f5743bd409..28c5e5aced7f 100644 --- a/tools/perf/util/bpf_skel/lock_data.h +++ b/tools/perf/util/bpf_skel/lock_data.h @@ -67,6 +67,7 @@ enum lock_aggr_mode { enum lock_class_sym { LOCK_CLASS_NONE, LOCK_CLASS_RQLOCK, + LOCK_CLASS_ZONE_LOCK, }; struct slab_cache_data { diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c index c152116df72f..72763bb8d1de 100644 --- a/tools/perf/util/bpf_skel/off_cpu.bpf.c +++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c @@ -18,10 +18,19 @@ #define MAX_STACKS 32 #define MAX_ENTRIES 102400 +#define MAX_CPUS 4096 +#define MAX_OFFCPU_LEN 37 + +// We have a 'struct stack' in vmlinux.h when building with GEN_VMLINUX_H=1 +struct __stack { + u64 array[MAX_STACKS]; +}; + struct tstamp_data { __u32 stack_id; __u32 state; __u64 timestamp; + struct __stack stack; }; struct offcpu_key { @@ -39,6 +48,24 @@ struct { __uint(max_entries, MAX_ENTRIES); } stacks SEC(".maps"); +struct offcpu_data { + u64 array[MAX_OFFCPU_LEN]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(max_entries, MAX_CPUS); +} offcpu_output SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(struct offcpu_data)); + __uint(max_entries, 1); +} offcpu_payload SEC(".maps"); + struct { __uint(type, BPF_MAP_TYPE_TASK_STORAGE); __uint(map_flags, BPF_F_NO_PREALLOC); @@ -97,6 +124,8 @@ const volatile bool uses_cgroup_v1 = false; int perf_subsys_id = -1; +__u64 offcpu_thresh_ns; + /* * Old kernel used to call it task_struct->state and now it's '__state'. * Use BPF CO-RE "ignored suffix rule" to deal with it like below: @@ -183,6 +212,47 @@ static inline int can_record(struct task_struct *t, int state) return 1; } +static inline int copy_stack(struct __stack *from, struct offcpu_data *to, int n) +{ + int len = 0; + + for (int i = 0; i < MAX_STACKS && from->array[i]; ++i, ++len) + to->array[n + 2 + i] = from->array[i]; + + return len; +} + +/** + * off_cpu_dump - dump off-cpu samples to ring buffer + * @data: payload for dumping off-cpu samples + * @key: off-cpu data + * @stack: stack trace of the task before being scheduled out + * + * If the threshold of off-cpu time is reached, acquire tid, period, callchain, and cgroup id + * information of the task, and dump it as a raw sample to perf ring buffer + */ +static int off_cpu_dump(void *ctx, struct offcpu_data *data, struct offcpu_key *key, + struct __stack *stack, __u64 delta) +{ + int n = 0, len = 0; + + data->array[n++] = (u64)key->tgid << 32 | key->pid; + data->array[n++] = delta; + + /* data->array[n] is callchain->nr (updated later) */ + data->array[n + 1] = PERF_CONTEXT_USER; + data->array[n + 2] = 0; + len = copy_stack(stack, data, n); + + /* update length of callchain */ + data->array[n] = len + 1; + n += len + 2; + + data->array[n++] = key->cgroup_id; + + return bpf_perf_event_output(ctx, &offcpu_output, BPF_F_CURRENT_CPU, data, n * sizeof(u64)); +} + static int off_cpu_stat(u64 *ctx, struct task_struct *prev, struct task_struct *next, int state) { @@ -207,6 +277,16 @@ static int off_cpu_stat(u64 *ctx, struct task_struct *prev, pelem->state = state; pelem->stack_id = stack_id; + /* + * If stacks are successfully collected by bpf_get_stackid(), collect them once more + * in task_storage for direct off-cpu sample dumping + */ + if (stack_id > 0 && bpf_get_stack(ctx, &pelem->stack, MAX_STACKS * sizeof(u64), BPF_F_USER_STACK)) { + /* + * This empty if block is used to avoid 'result unused warning' from bpf_get_stack(). + * If the collection fails, continue with the logic for the next task. + */ + } next: pelem = bpf_task_storage_get(&tstamp, next, NULL, 0); @@ -221,11 +301,19 @@ next: __u64 delta = ts - pelem->timestamp; __u64 *total; - total = bpf_map_lookup_elem(&off_cpu, &key); - if (total) - *total += delta; - else - bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); + if (delta >= offcpu_thresh_ns) { + int zero = 0; + struct offcpu_data *data = bpf_map_lookup_elem(&offcpu_payload, &zero); + + if (data) + off_cpu_dump(ctx, data, &key, &pelem->stack, delta); + } else { + total = bpf_map_lookup_elem(&off_cpu, &key); + if (total) + *total += delta; + else + bpf_map_update_elem(&off_cpu, &key, &delta, BPF_ANY); + } /* prevent to reuse the timestamp later */ pelem->timestamp = 0; diff --git a/tools/perf/util/bpf_skel/syscall_summary.bpf.c b/tools/perf/util/bpf_skel/syscall_summary.bpf.c new file mode 100644 index 000000000000..1bcd066a5199 --- /dev/null +++ b/tools/perf/util/bpf_skel/syscall_summary.bpf.c @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Trace raw_syscalls tracepoints to collect system call statistics. + */ + +#include "vmlinux.h" +#include "syscall_summary.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +/* This is to calculate a delta between sys-enter and sys-exit for each thread */ +struct syscall_trace { + int nr; /* syscall number is only available at sys-enter */ + int unused; + u64 timestamp; +}; + +#define MAX_ENTRIES (128 * 1024) + +struct syscall_trace_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, int); /* tid */ + __type(value, struct syscall_trace); + __uint(max_entries, MAX_ENTRIES); +} syscall_trace_map SEC(".maps"); + +struct syscall_stats_map { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, struct syscall_key); + __type(value, struct syscall_stats); + __uint(max_entries, MAX_ENTRIES); +} syscall_stats_map SEC(".maps"); + +int enabled; /* controlled from userspace */ + +const volatile enum syscall_aggr_mode aggr_mode; +const volatile int use_cgroup_v2; + +int perf_subsys_id = -1; + +static inline __u64 get_current_cgroup_id(void) +{ + struct task_struct *task; + struct cgroup *cgrp; + + if (use_cgroup_v2) + return bpf_get_current_cgroup_id(); + + task = bpf_get_current_task_btf(); + + if (perf_subsys_id == -1) { +#if __has_builtin(__builtin_preserve_enum_value) + perf_subsys_id = bpf_core_enum_value(enum cgroup_subsys_id, + perf_event_cgrp_id); +#else + perf_subsys_id = perf_event_cgrp_id; +#endif + } + + cgrp = BPF_CORE_READ(task, cgroups, subsys[perf_subsys_id], cgroup); + return BPF_CORE_READ(cgrp, kn, id); +} + +static void update_stats(int cpu_or_tid, u64 cgroup_id, int nr, s64 duration, + long ret) +{ + struct syscall_key key = { + .cpu_or_tid = cpu_or_tid, + .cgroup = cgroup_id, + .nr = nr, + }; + struct syscall_stats *stats; + + stats = bpf_map_lookup_elem(&syscall_stats_map, &key); + if (stats == NULL) { + struct syscall_stats zero = {}; + + bpf_map_update_elem(&syscall_stats_map, &key, &zero, BPF_NOEXIST); + stats = bpf_map_lookup_elem(&syscall_stats_map, &key); + if (stats == NULL) + return; + } + + __sync_fetch_and_add(&stats->count, 1); + if (ret < 0) + __sync_fetch_and_add(&stats->error, 1); + + if (duration > 0) { + __sync_fetch_and_add(&stats->total_time, duration); + __sync_fetch_and_add(&stats->squared_sum, duration * duration); + if (stats->max_time < duration) + stats->max_time = duration; + if (stats->min_time > duration || stats->min_time == 0) + stats->min_time = duration; + } + + return; +} + +SEC("tp_btf/sys_enter") +int sys_enter(u64 *ctx) +{ + int tid; + struct syscall_trace st; + + if (!enabled) + return 0; + + st.nr = ctx[1]; /* syscall number */ + st.unused = 0; + st.timestamp = bpf_ktime_get_ns(); + + tid = bpf_get_current_pid_tgid(); + bpf_map_update_elem(&syscall_trace_map, &tid, &st, BPF_ANY); + + return 0; +} + +SEC("tp_btf/sys_exit") +int sys_exit(u64 *ctx) +{ + int tid; + int key = 0; + u64 cgroup = 0; + long ret = ctx[1]; /* return value of the syscall */ + struct syscall_trace *st; + s64 delta; + + if (!enabled) + return 0; + + tid = bpf_get_current_pid_tgid(); + st = bpf_map_lookup_elem(&syscall_trace_map, &tid); + if (st == NULL) + return 0; + + if (aggr_mode == SYSCALL_AGGR_THREAD) + key = tid; + else if (aggr_mode == SYSCALL_AGGR_CGROUP) + cgroup = get_current_cgroup_id(); + else + key = bpf_get_smp_processor_id(); + + delta = bpf_ktime_get_ns() - st->timestamp; + update_stats(key, cgroup, st->nr, delta, ret); + + bpf_map_delete_elem(&syscall_trace_map, &tid); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/util/bpf_skel/syscall_summary.h b/tools/perf/util/bpf_skel/syscall_summary.h new file mode 100644 index 000000000000..72ccccb45925 --- /dev/null +++ b/tools/perf/util/bpf_skel/syscall_summary.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Data structures shared between BPF and tools. */ +#ifndef UTIL_BPF_SKEL_SYSCALL_SUMMARY_H +#define UTIL_BPF_SKEL_SYSCALL_SUMMARY_H + +enum syscall_aggr_mode { + SYSCALL_AGGR_THREAD, + SYSCALL_AGGR_CPU, + SYSCALL_AGGR_CGROUP, +}; + +struct syscall_key { + u64 cgroup; + int cpu_or_tid; + int nr; +}; + +struct syscall_stats { + u64 total_time; + u64 squared_sum; + u64 max_time; + u64 min_time; + u32 count; + u32 error; +}; + +#endif /* UTIL_BPF_SKEL_SYSCALL_SUMMARY_H */ diff --git a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h index 7b81d3173917..a59ce912be18 100644 --- a/tools/perf/util/bpf_skel/vmlinux/vmlinux.h +++ b/tools/perf/util/bpf_skel/vmlinux/vmlinux.h @@ -203,4 +203,13 @@ struct bpf_iter__kmem_cache { struct kmem_cache *s; } __attribute__((preserve_access_index)); +struct zone { + spinlock_t lock; +} __attribute__((preserve_access_index)); + +struct pglist_data { + struct zone node_zones[6]; /* value for all possible config */ + int nr_zones; +} __attribute__((preserve_access_index)); + #endif // __VMLINUX_H diff --git a/tools/perf/util/demangle-cxx.h b/tools/perf/util/demangle-cxx.h index 26b5b66c0b4e..9359937a881a 100644 --- a/tools/perf/util/demangle-cxx.h +++ b/tools/perf/util/demangle-cxx.h @@ -2,6 +2,8 @@ #ifndef __PERF_DEMANGLE_CXX #define __PERF_DEMANGLE_CXX 1 +#include <stdbool.h> + #ifdef __cplusplus extern "C" { #endif diff --git a/tools/perf/util/demangle-rust-v0.c b/tools/perf/util/demangle-rust-v0.c new file mode 100644 index 000000000000..19924d85407d --- /dev/null +++ b/tools/perf/util/demangle-rust-v0.c @@ -0,0 +1,2042 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// The contents of this file come from the Rust rustc-demangle library, hosted +// in the <https://github.com/rust-lang/rustc-demangle> repository, licensed +// under "Apache-2.0 OR MIT". For copyright details, see +// <https://github.com/rust-lang/rustc-demangle/blob/main/README.md>. +// Please note that the file should be kept as close as possible to upstream. + +// Code for demangling Rust symbols. This code is mostly +// a line-by-line translation of the Rust code in `rustc-demangle`. + +// you can find the latest version of this code in https://github.com/rust-lang/rustc-demangle + +#include <stdint.h> +#include <stddef.h> +#include <string.h> +#include <stdbool.h> +#include <sys/param.h> +#include <stdio.h> + +#include "demangle-rust-v0.h" + +#if defined(__GNUC__) || defined(__clang__) +#define NODISCARD __attribute__((warn_unused_result)) +#else +#define NODISCARD +#endif + +#define MAX_DEPTH 500 + +typedef enum { + DemangleOk, + DemangleInvalid, + DemangleRecursed, + DemangleBug, +} demangle_status; + +struct demangle_v0 { + const char *mangled; + size_t mangled_len; +}; + +struct demangle_legacy { + const char *mangled; + size_t mangled_len; + size_t elements; +}; + +// private version of memrchr to avoid _GNU_SOURCE +static void *demangle_memrchr(const void *s, int c, size_t n) { + const uint8_t *s_ = s; + for (; n != 0; n--) { + if (s_[n-1] == c) { + return (void*)&s_[n-1]; + } + } + return NULL; +} + + +static bool unicode_iscontrol(uint32_t ch) { + // this is *technically* a unicode table, but + // some unicode properties are simpler than you might think + return ch < 0x20 || (ch >= 0x7f && ch < 0xa0); +} + +// "good enough" tables, the only consequence is that when printing +// *constant strings*, some characters are printed as `\u{abcd}` rather than themselves. +// +// I'm leaving these here to allow easily replacing them with actual +// tables if desired. +static bool unicode_isprint(uint32_t ch) { + if (ch < 0x20) { + return false; + } + if (ch < 0x7f) { + return true; + } + return false; +} + +static bool unicode_isgraphemextend(uint32_t ch) { + (void)ch; + return false; +} + +static bool str_isascii(const char *s, size_t s_len) { + for (size_t i = 0; i < s_len; i++) { + if (s[i] & 0x80) { + return false; + } + } + + return true; +} + +typedef enum { + PunycodeOk, + PunycodeError +} punycode_status; + +struct parser { + // the parser assumes that `sym` has a safe "terminating byte". It might be NUL, + // but it might also be something else if a symbol is "truncated". + const char *sym; + size_t sym_len; + size_t next; + uint32_t depth; +}; + +struct printer { + demangle_status status; // if status == 0 parser is valid + struct parser parser; + char *out; // NULL for no output [in which case out_len is not decremented] + size_t out_len; + uint32_t bound_lifetime_depth; + bool alternate; +}; + +static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value); +static NODISCARD overflow_status printer_print_type(struct printer *printer); +static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value); + +static NODISCARD demangle_status try_parse_path(struct parser *parser) { + struct printer printer = { + DemangleOk, + *parser, + NULL, + SIZE_MAX, + 0, + false + }; + overflow_status ignore = printer_print_path(&printer, false); // can't fail since no output + (void)ignore; + *parser = printer.parser; + return printer.status; +} + +NODISCARD static demangle_status rust_demangle_v0_demangle(const char *s, size_t s_len, struct demangle_v0 *res, const char **rest) { + if (s_len > strlen(s)) { + // s_len only exists to shorten the string, this is not a buffer API + return DemangleInvalid; + } + + const char *inner; + size_t inner_len; + if (s_len >= 2 && !strncmp(s, "_R", strlen("_R"))) { + inner = s+2; + inner_len = s_len - 2; + } else if (s_len >= 1 && !strncmp(s, "R", strlen("R"))) { + // On Windows, dbghelp strips leading underscores, so we accept "R..." + // form too. + inner = s+1; + inner_len = s_len - 1; + } else if (s_len >= 3 && !strncmp(s, "__R", strlen("__R"))) { + // On OSX, symbols are prefixed with an extra _ + inner = s+3; + inner_len = s_len - 3; + } else { + return DemangleInvalid; + } + + // Paths always start with uppercase characters. + if (*inner < 'A' || *inner > 'Z') { + return DemangleInvalid; + } + + if (!str_isascii(inner, inner_len)) { + return DemangleInvalid; + } + + struct parser parser = { inner, inner_len, 0, 0 }; + + demangle_status status = try_parse_path(&parser); + if (status != DemangleOk) return status; + char next = parser.sym[parser.next]; + + // Instantiating crate (paths always start with uppercase characters). + if (parser.next < parser.sym_len && next >= 'A' && next <= 'Z') { + status = try_parse_path(&parser); + if (status != DemangleOk) return status; + } + + res->mangled = inner; + res->mangled_len = inner_len; + if (rest) { + *rest = parser.sym + parser.next; + } + + return DemangleOk; +} + +// This might require `len` to be up to 3 characters bigger than the real output len in case of utf-8 +NODISCARD static overflow_status rust_demangle_v0_display_demangle(struct demangle_v0 res, char *out, size_t len, bool alternate) { + struct printer printer = { + DemangleOk, + { + res.mangled, + res.mangled_len, + 0, + 0 + }, + out, + len, + 0, + alternate + }; + if (printer_print_path(&printer, true) == OverflowOverflow) { + return OverflowOverflow; + } + if (printer.out_len < OVERFLOW_MARGIN) { + return OverflowOverflow; + } + *printer.out = '\0'; + return OverflowOk; +} + +static size_t code_to_utf8(unsigned char *buffer, uint32_t code) +{ + if (code <= 0x7F) { + buffer[0] = code; + return 1; + } + if (code <= 0x7FF) { + buffer[0] = 0xC0 | (code >> 6); /* 110xxxxx */ + buffer[1] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + return 2; + } + if (code <= 0xFFFF) { + buffer[0] = 0xE0 | (code >> 12); /* 1110xxxx */ + buffer[1] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + buffer[2] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + return 3; + } + if (code <= 0x10FFFF) { + buffer[0] = 0xF0 | (code >> 18); /* 11110xxx */ + buffer[1] = 0x80 | ((code >> 12) & 0x3F); /* 10xxxxxx */ + buffer[2] = 0x80 | ((code >> 6) & 0x3F); /* 10xxxxxx */ + buffer[3] = 0x80 | (code & 0x3F); /* 10xxxxxx */ + return 4; + } + return 0; +} + + +// return length of char at byte, or SIZE_MAX if invalid. buf should have 4 valid characters +static NODISCARD size_t utf8_next_char(uint8_t *s, uint32_t *ch) { + uint8_t byte = *s; + // UTF8-1 = %x00-7F + // UTF8-2 = %xC2-DF UTF8-tail + // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / + // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) + // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / + // %xF4 %x80-8F 2( UTF8-tail ) + if (byte < 0x80) { + *ch = byte; + return 1; + } else if (byte < 0xc2) { + return SIZE_MAX; + } else if (byte < 0xe0) { + if (s[1] >= 0x80 && s[1] < 0xc0) { + *ch = ((byte&0x1f)<<6) + (s[1] & 0x3f); + return 2; + } + return SIZE_MAX; + } if (byte < 0xf0) { + if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0)) { + return SIZE_MAX; // basic validation + } + if (byte == 0xe0 && s[1] < 0xa0) { + return SIZE_MAX; // overshort + } + if (byte == 0xed && s[1] >= 0xa0) { + return SIZE_MAX; // surrogate + } + *ch = ((byte&0x0f)<<12) + ((s[1] & 0x3f)<<6) + (s[2] & 0x3f); + return 3; + } else if (byte < 0xf5) { + if (!(s[1] >= 0x80 && s[1] < 0xc0) || !(s[2] >= 0x80 && s[2] < 0xc0) || !(s[3] >= 0x80 && s[3] < 0xc0)) { + return SIZE_MAX; // basic validation + } + if (byte == 0xf0 && s[1] < 0x90) { + return SIZE_MAX; // overshort + } + if (byte == 0xf4 && s[1] >= 0x90) { + return SIZE_MAX; // over max + } + *ch = ((byte&0x07)<<18) + ((s[1] & 0x3f)<<12) + ((s[2] & 0x3f)<<6) + (s[3]&0x3f); + return 4; + } else { + return SIZE_MAX; + } +} + +static NODISCARD bool validate_char(uint32_t n) { + return ((n ^ 0xd800) - 0x800) < 0x110000 - 0x800; +} + +#define SMALL_PUNYCODE_LEN 128 + +static NODISCARD punycode_status punycode_decode(const char *start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint32_t (*out_)[SMALL_PUNYCODE_LEN], size_t *out_len) { + uint32_t *out = *out_; + + if (punycode_len == 0) { + return PunycodeError; + } + + if (ascii_len > SMALL_PUNYCODE_LEN) { + return PunycodeError; + } + for (size_t i = 0; i < ascii_len; i++) { + out[i] = start[i]; + } + size_t len = ascii_len; + + size_t base = 36, t_min = 1, t_max = 26, skew = 38, damp = 700, bias = 72, i = 0, n = 0x80; + for (;;) { + size_t delta = 0, w = 1, k = 0; + for (;;) { + k += base; + size_t biased = k < bias ? 0 : k - bias; + size_t t = MIN(MAX(biased, t_min), t_max); + size_t d; + if (punycode_len == 0) { + return PunycodeError; + } + char nx = *punycode_start++; + punycode_len--; + if ('a' <= nx && nx <= 'z') { + d = nx - 'a'; + } else if ('0' <= nx && nx <= '9') { + d = 26 + (nx - '0'); + } else { + return PunycodeError; + } + if (w == 0 || d > SIZE_MAX / w || d*w > SIZE_MAX - delta) { + return PunycodeError; + } + delta += d * w; + if (d < t) { + break; + } + if (base < t || w == 0 || (base - t) > SIZE_MAX / w) { + return PunycodeError; + } + w *= (base - t); + } + + len += 1; + if (i > SIZE_MAX - delta) { + return PunycodeError; + } + i += delta; + if (n > SIZE_MAX - i / len) { + return PunycodeError; + } + n += i / len; + i %= len; + + // char validation + if (n > UINT32_MAX || !validate_char((uint32_t)n)) { + return PunycodeError; + } + + // insert new character + if (len > SMALL_PUNYCODE_LEN) { + return PunycodeError; + } + memmove(out + i + 1, out + i, (len - i - 1) * sizeof(uint32_t)); + out[i] = (uint32_t)n; + + // start i index at incremented position + i++; + + // If there are no more deltas, decoding is complete. + if (punycode_len == 0) { + *out_len = len; + return PunycodeOk; + } + + // Perform bias adaptation. + delta /= damp; + damp = 2; + + delta += delta / len; + k = 0; + while (delta > ((base - t_min) * t_max) / 2) { + delta /= base - t_min; + k += base; + } + bias = k + ((base - t_min + 1) * delta) / (delta + skew); + } +} + +struct ident { + const char *ascii_start; + size_t ascii_len; + const char *punycode_start; + size_t punycode_len; +}; + +static NODISCARD overflow_status display_ident(const char *ascii_start, size_t ascii_len, const char *punycode_start, size_t punycode_len, uint8_t *out, size_t *out_len) { + uint32_t outbuf[SMALL_PUNYCODE_LEN]; + + size_t wide_len; + size_t out_buflen = *out_len; + + if (punycode_len == 0) { + if (ascii_len > out_buflen) { + return OverflowOverflow; + } + memcpy(out, ascii_start, ascii_len); + *out_len = ascii_len; + } else if (punycode_decode(ascii_start, ascii_len, punycode_start, punycode_len, &outbuf, &wide_len) == PunycodeOk) { + size_t narrow_len = 0; + for (size_t i = 0; i < wide_len; i++) { + if (out_buflen - narrow_len < 4) { + return OverflowOverflow; + } + unsigned char *pos = &out[narrow_len]; + narrow_len += code_to_utf8(pos, outbuf[i]); + } + *out_len = narrow_len; + } else { + size_t narrow_len = 0; + if (out_buflen < strlen("punycode{")) { + return OverflowOverflow; + } + memcpy(out, "punycode{", strlen("punycode{")); + narrow_len = strlen("punycode{"); + if (ascii_len > 0) { + if (out_buflen - narrow_len < ascii_len || out_buflen - narrow_len - ascii_len < 1) { + return OverflowOverflow; + } + memcpy(out + narrow_len, ascii_start, ascii_len); + narrow_len += ascii_len; + out[narrow_len] = '-'; + narrow_len++; + } + if (out_buflen - narrow_len < punycode_len || out_buflen - narrow_len - punycode_len < 1) { + return OverflowOverflow; + } + memcpy(out + narrow_len, punycode_start, punycode_len); + narrow_len += punycode_len; + out[narrow_len] = '}'; + narrow_len++; + *out_len = narrow_len; + } + + return OverflowOk; +} + +static NODISCARD bool try_parse_uint(const char *buf, size_t len, uint64_t *result) { + size_t cur = 0; + for(;cur < len && buf[cur] == '0';cur++); + uint64_t result_val = 0; + if (len - cur > 16) return false; + for(;cur < len;cur++) { + char c = buf[cur]; + result_val <<= 4; + if ('0' <= c && c <= '9') { + result_val += c - '0'; + } else if ('a' <= c && c <= 'f') { + result_val += 10 + (c - 'a'); + } else { + return false; + } + } + *result = result_val; + return true; +} + +static NODISCARD bool dinibble2int(const char *buf, uint8_t *result) { + uint8_t result_val = 0; + for (int i = 0; i < 2; i++) { + char c = buf[i]; + result_val <<= 4; + if ('0' <= c && c <= '9') { + result_val += c - '0'; + } else if ('a' <= c && c <= 'f') { + result_val += 10 + (c - 'a'); + } else { + return false; + } + } + *result = result_val; + return true; +} + + +typedef enum { + NtsOk = 0, + NtsOverflow = 1, + NtsInvalid = 2 +} nibbles_to_string_status; + +// '\u{10ffff}', +margin +#define ESCAPED_SIZE 12 + +static NODISCARD size_t char_to_string(uint32_t ch, uint8_t quote, bool first, char (*buf)[ESCAPED_SIZE]) { + // encode the character + char *escaped_buf = *buf; + escaped_buf[0] = '\\'; + size_t escaped_len = 2; + switch (ch) { + case '\0': + escaped_buf[1] = '0'; + break; + case '\t': + escaped_buf[1] = 't'; + break; + case '\r': + escaped_buf[1] = 'r'; + break; + case '\n': + escaped_buf[1] = 'n'; + break; + case '\\': + escaped_buf[1] = '\\'; + break; + default: + if (ch == quote) { + escaped_buf[1] = ch; + } else if (!unicode_isprint(ch) || (first && unicode_isgraphemextend(ch))) { + int hexlen = snprintf(escaped_buf, ESCAPED_SIZE, "\\u{%x}", (unsigned int)ch); + if (hexlen < 0) { + return 0; // (snprintf shouldn't fail!) + } + escaped_len = hexlen; + } else { + // printable character + escaped_buf[0] = ch; + escaped_len = 1; + } + break; + } + + return escaped_len; +} + +// convert nibbles to a single/double-quoted string +static NODISCARD nibbles_to_string_status nibbles_to_string(const char *buf, size_t len, uint8_t *out, size_t *out_len) { + uint8_t quote = '"'; + bool first = true; + + if ((len % 2) != 0) { + return NtsInvalid; // odd number of nibbles + } + + size_t cur_out_len = 0; + + // write starting quote + if (out != NULL) { + cur_out_len = *out_len; + if (cur_out_len == 0) { + return NtsOverflow; + } + *out++ = quote; + cur_out_len--; + } + + uint8_t conv_buf[4] = {0}; + size_t conv_buf_len = 0; + while (len > 1 || conv_buf_len > 0) { + while (len > 1 && conv_buf_len < sizeof(conv_buf)) { + if (!dinibble2int(buf, &conv_buf[conv_buf_len])) { + return NtsInvalid; + } + conv_buf_len++; + buf += 2; + len -= 2; + } + + // conv_buf is full here if possible, process 1 UTF-8 character + uint32_t ch = 0; + size_t consumed = utf8_next_char(conv_buf, &ch); + if (consumed > conv_buf_len) { + // either SIZE_MAX (invalid UTF-8) or finished input buffer and + // there are still bytes remaining, in both cases invalid + return NtsInvalid; + } + + // "consume" the character + memmove(conv_buf, conv_buf+consumed, conv_buf_len-consumed); + conv_buf_len -= consumed; + + char escaped_buf[ESCAPED_SIZE]; + size_t escaped_len = char_to_string(ch, '"', first, &escaped_buf); + if (out != NULL) { + if (cur_out_len < escaped_len) { + return NtsOverflow; + } + memcpy(out, escaped_buf, escaped_len); + out += escaped_len; + cur_out_len -= escaped_len; + } + first = false; + } + + // write ending quote + if (out != NULL) { + if (cur_out_len == 0) { + return NtsOverflow; + } + *out++ = quote; + cur_out_len--; + *out_len -= cur_out_len; // subtract remaining space to get used space + } + + return NtsOk; +} + +static const char* basic_type(uint8_t tag) { + switch(tag) { + case 'b': + return "bool"; + case 'c': + return "char"; + case 'e': + return "str"; + case 'u': + return "()"; + case 'a': + return "i8"; + case 's': + return "i16"; + case 'l': + return "i32"; + case 'x': + return "i64"; + case 'n': + return "i128"; + case 'i': + return "isize"; + case 'h': + return "u8"; + case 't': + return "u16"; + case 'm': + return "u32"; + case 'y': + return "u64"; + case 'o': + return "u128"; + case 'j': + return "usize"; + case 'f': + return "f32"; + case 'd': + return "f64"; + case 'z': + return "!"; + case 'p': + return "_"; + case 'v': + return "..."; + default: + return NULL; + } +} + +static NODISCARD demangle_status parser_push_depth(struct parser *parser) { + parser->depth++; + if (parser->depth > MAX_DEPTH) { + return DemangleRecursed; + } else { + return DemangleOk; + } +} + +static demangle_status parser_pop_depth(struct parser *parser) { + parser->depth--; + return DemangleOk; +} + +static uint8_t parser_peek(struct parser const *parser) { + if (parser->next == parser->sym_len) { + return 0; // add a "pseudo nul terminator" to avoid peeking past the end of a symbol + } else { + return parser->sym[parser->next]; + } +} + +static bool parser_eat(struct parser *parser, uint8_t ch) { + if (parser_peek(parser) == ch) { + if (ch != 0) { // safety: make sure we don't skip past the NUL terminator + parser->next++; + } + return true; + } else { + return false; + } +} + +static uint8_t parser_next(struct parser *parser) { + // don't advance after end of input, and return an imaginary NUL terminator + if (parser->next == parser->sym_len) { + return 0; + } else { + return parser->sym[parser->next++]; + } +} + +static NODISCARD demangle_status parser_ch(struct parser *parser, uint8_t *next) { + // don't advance after end of input + if (parser->next == parser->sym_len) { + return DemangleInvalid; + } else { + *next = parser->sym[parser->next++]; + return DemangleOk; + } +} + +struct buf { + const char *start; + size_t len; +}; + +static NODISCARD demangle_status parser_hex_nibbles(struct parser *parser, struct buf *buf) { + size_t start = parser->next; + for (;;) { + uint8_t ch = parser_next(parser); + if (ch == '_') { + break; + } + if (!(('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'f'))) { + return DemangleInvalid; + } + } + buf->start = parser->sym + start; + buf->len = parser->next - start - 1; // skip final _ + return DemangleOk; +} + +static NODISCARD demangle_status parser_digit_10(struct parser *parser, uint8_t *out) { + uint8_t ch = parser_peek(parser); + if ('0' <= ch && ch <= '9') { + *out = ch - '0'; + parser->next++; + return DemangleOk; + } else { + return DemangleInvalid; + } +} + +static NODISCARD demangle_status parser_digit_62(struct parser *parser, uint64_t *out) { + uint8_t ch = parser_peek(parser); + if ('0' <= ch && ch <= '9') { + *out = ch - '0'; + parser->next++; + return DemangleOk; + } else if ('a' <= ch && ch <= 'z') { + *out = 10 + (ch - 'a'); + parser->next++; + return DemangleOk; + } else if ('A' <= ch && ch <= 'Z') { + *out = 10 + 26 + (ch - 'A'); + parser->next++; + return DemangleOk; + } else { + return DemangleInvalid; + } +} + +static NODISCARD demangle_status parser_integer_62(struct parser *parser, uint64_t *out) { + if (parser_eat(parser, '_')) { + *out = 0; + return DemangleOk; + } + + uint64_t x = 0; + demangle_status status; + while (!parser_eat(parser, '_')) { + uint64_t d; + if ((status = parser_digit_62(parser, &d)) != DemangleOk) { + return status; + } + if (x > UINT64_MAX / 62) { + return DemangleInvalid; + } + x *= 62; + if (x > UINT64_MAX - d) { + return DemangleInvalid; + } + x += d; + } + if (x == UINT64_MAX) { + return DemangleInvalid; + } + *out = x + 1; + return DemangleOk; +} + +static NODISCARD demangle_status parser_opt_integer_62(struct parser *parser, uint8_t tag, uint64_t *out) { + if (!parser_eat(parser, tag)) { + *out = 0; + return DemangleOk; + } + + demangle_status status; + if ((status = parser_integer_62(parser, out)) != DemangleOk) { + return status; + } + if (*out == UINT64_MAX) { + return DemangleInvalid; + } + *out = *out + 1; + return DemangleOk; +} + +static NODISCARD demangle_status parser_disambiguator(struct parser *parser, uint64_t *out) { + return parser_opt_integer_62(parser, 's', out); +} + +typedef uint8_t parser_namespace_type; + +static NODISCARD demangle_status parser_namespace(struct parser *parser, parser_namespace_type *out) { + uint8_t next = parser_next(parser); + if ('A' <= next && next <= 'Z') { + *out = next; + return DemangleOk; + } else if ('a' <= next && next <= 'z') { + *out = 0; + return DemangleOk; + } else { + return DemangleInvalid; + } +} + +static NODISCARD demangle_status parser_backref(struct parser *parser, struct parser *out) { + size_t start = parser->next; + if (start == 0) { + return DemangleBug; + } + size_t s_start = start - 1; + uint64_t i; + demangle_status status = parser_integer_62(parser, &i); + if (status != DemangleOk) { + return status; + } + if (i >= s_start) { + return DemangleInvalid; + } + struct parser res = { + .sym = parser->sym, + .sym_len = parser->sym_len, + .next = (size_t)i, + .depth = parser->depth + }; + status = parser_push_depth(&res); + if (status != DemangleOk) { + return status; + } + *out = res; + return DemangleOk; +} + +static NODISCARD demangle_status parser_ident(struct parser *parser, struct ident *out) { + bool is_punycode = parser_eat(parser, 'u'); + size_t len; + uint8_t d; + demangle_status status = parser_digit_10(parser, &d); + len = d; + if (status != DemangleOk) { + return status; + } + if (len) { + for (;;) { + status = parser_digit_10(parser, &d); + if (status != DemangleOk) { + break; + } + if (len > SIZE_MAX / 10) { + return DemangleInvalid; + } + len *= 10; + if (len > SIZE_MAX - d) { + return DemangleInvalid; + } + len += d; + } + } + + // Skip past the optional `_` separator. + parser_eat(parser, '_'); + + size_t start = parser->next; + if (parser->sym_len - parser->next < len) { + return DemangleInvalid; + } + parser->next += len; + + const char *ident = &parser->sym[start]; + + if (is_punycode) { + const char *underscore = demangle_memrchr(ident, '_', (size_t)len); + if (underscore == NULL) { + *out = (struct ident){ + .ascii_start="", + .ascii_len=0, + .punycode_start=ident, + .punycode_len=len + }; + } else { + size_t ascii_len = underscore - ident; + // ascii_len <= len - 1 since `_` is in the first len bytes + size_t punycode_len = len - 1 - ascii_len; + *out = (struct ident){ + .ascii_start=ident, + .ascii_len=ascii_len, + .punycode_start=underscore + 1, + .punycode_len=punycode_len + }; + } + if (out->punycode_len == 0) { + return DemangleInvalid; + } + return DemangleOk; + } else { + *out = (struct ident) { + .ascii_start=ident, + .ascii_len=(size_t)len, + .punycode_start="", + .punycode_len=0, + }; + return DemangleOk; + } +} + +#define INVALID_SYNTAX "{invalid syntax}" + +static const char *demangle_error_message(demangle_status status) { + switch (status) { + case DemangleInvalid: + return INVALID_SYNTAX; + case DemangleBug: + return "{bug}"; + case DemangleRecursed: + return "{recursion limit reached}"; + default: + return "{unknown error}"; + } +} + +#define PRINT(print_fn) \ + do { \ + if ((print_fn) == OverflowOverflow) { \ + return OverflowOverflow; \ + } \ + } while(0) + +#define PRINT_CH(printer, s) PRINT(printer_print_ch((printer), (s))) +#define PRINT_STR(printer, s) PRINT(printer_print_str((printer), (s))) +#define PRINT_U64(printer, s) PRINT(printer_print_u64((printer), (s))) +#define PRINT_IDENT(printer, s) PRINT(printer_print_ident((printer), (s))) + +#define INVALID(printer) \ + do { \ + PRINT_STR((printer), INVALID_SYNTAX); \ + (printer)->status = DemangleInvalid; \ + return OverflowOk; \ + } while(0) + +#define PARSE(printer, method, ...) \ + do { \ + if ((printer)->status != DemangleOk) { \ + PRINT_STR((printer), "?"); \ + return OverflowOk; \ + } else { \ + demangle_status _parse_status = method(&(printer)->parser, ## __VA_ARGS__); \ + if (_parse_status != DemangleOk) { \ + PRINT_STR((printer), demangle_error_message(_parse_status)); \ + (printer)->status = _parse_status; \ + return OverflowOk; \ + } \ + } \ + } while(0) + +#define PRINT_SEP_LIST(printer, body, sep) \ + do { \ + size_t _sep_list_i; \ + PRINT_SEP_LIST_COUNT(printer, _sep_list_i, body, sep); \ + } while(0) + +#define PRINT_SEP_LIST_COUNT(printer, count, body, sep) \ + do { \ + count = 0; \ + while ((printer)->status == DemangleOk && !printer_eat((printer), 'E')) { \ + if (count > 0) { PRINT_STR(printer, sep); } \ + body; \ + count++; \ + } \ + } while(0) + +static bool printer_eat(struct printer *printer, uint8_t b) { + if (printer->status != DemangleOk) { + return false; + } + + return parser_eat(&printer->parser, b); +} + +static void printer_pop_depth(struct printer *printer) { + if (printer->status == DemangleOk) { + parser_pop_depth(&printer->parser); + } +} + +static NODISCARD overflow_status printer_print_buf(struct printer *printer, const char *start, size_t len) { + if (printer->out == NULL) { + return OverflowOk; + } + if (printer->out_len < len) { + return OverflowOverflow; + } + + memcpy(printer->out, start, len); + printer->out += len; + printer->out_len -= len; + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_str(struct printer *printer, const char *buf) { + return printer_print_buf(printer, buf, strlen(buf)); +} + +static NODISCARD overflow_status printer_print_ch(struct printer *printer, char ch) { + return printer_print_buf(printer, &ch, 1); +} + +static NODISCARD overflow_status printer_print_u64(struct printer *printer, uint64_t n) { + char buf[32] = {0}; + sprintf(buf, "%llu", (unsigned long long)n); // printing uint64 uses 21 < 32 chars + return printer_print_str(printer, buf); +} + +static NODISCARD overflow_status printer_print_ident(struct printer *printer, struct ident *ident) { + if (printer->out == NULL) { + return OverflowOk; + } + + size_t out_len = printer->out_len; + overflow_status status; + if ((status = display_ident(ident->ascii_start, ident->ascii_len, ident->punycode_start, ident->punycode_len, (uint8_t*)printer->out, &out_len)) != OverflowOk) { + return status; + } + printer->out += out_len; + printer->out_len -= out_len; + return OverflowOk; +} + +typedef overflow_status (*printer_fn)(struct printer *printer); +typedef overflow_status (*backref_fn)(struct printer *printer, bool *arg); + +static NODISCARD overflow_status printer_print_backref(struct printer *printer, backref_fn func, bool *arg) { + struct parser backref; + PARSE(printer, parser_backref, &backref); + + if (printer->out == NULL) { + return OverflowOk; + } + + struct parser orig_parser = printer->parser; + demangle_status orig_status = printer->status; // fixme not sure this is needed match for Ok on the Rust side + printer->parser = backref; + printer->status = DemangleOk; + overflow_status status = func(printer, arg); + printer->parser = orig_parser; + printer->status = orig_status; + + return status; +} + +static NODISCARD overflow_status printer_print_lifetime_from_index(struct printer *printer, uint64_t lt) { + // Bound lifetimes aren't tracked when skipping printing. + if (printer->out == NULL) { + return OverflowOk; + } + + PRINT_STR(printer, "'"); + if (lt == 0) { + PRINT_STR(printer, "_"); + return OverflowOk; + } + + if (printer->bound_lifetime_depth < lt) { + INVALID(printer); + } else { + uint64_t depth = printer->bound_lifetime_depth - lt; + if (depth < 26) { + PRINT_CH(printer, 'a' + depth); + } else { + PRINT_STR(printer, "_"); + PRINT_U64(printer, depth); + } + + return OverflowOk; + } +} + +static NODISCARD overflow_status printer_in_binder(struct printer *printer, printer_fn func) { + uint64_t bound_lifetimes; + PARSE(printer, parser_opt_integer_62, 'G', &bound_lifetimes); + + // Don't track bound lifetimes when skipping printing. + if (printer->out == NULL) { + return func(printer); + } + + if (bound_lifetimes > 0) { + PRINT_STR(printer, "for<"); + for (uint64_t i = 0; i < bound_lifetimes; i++) { + if (i > 0) { + PRINT_STR(printer, ", "); + } + printer->bound_lifetime_depth++; + PRINT(printer_print_lifetime_from_index(printer, 1)); + } + PRINT_STR(printer, "> "); + } + + overflow_status r = func(printer); + printer->bound_lifetime_depth -= bound_lifetimes; + + return r; +} + +static NODISCARD overflow_status printer_print_generic_arg(struct printer *printer) { + if (printer_eat(printer, 'L')) { + uint64_t lt; + PARSE(printer, parser_integer_62, <); + return printer_print_lifetime_from_index(printer, lt); + } else if (printer_eat(printer, 'K')) { + return printer_print_const(printer, false); + } else { + return printer_print_type(printer); + } +} + +static NODISCARD overflow_status printer_print_generic_args(struct printer *printer) { + PRINT_STR(printer, "<"); + PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", "); + PRINT_STR(printer, ">"); + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_path_out_of_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_path(printer, false); +} + +static NODISCARD overflow_status printer_print_path_in_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_path(printer, true); +} + +static NODISCARD overflow_status printer_print_path(struct printer *printer, bool in_value) { + PARSE(printer, parser_push_depth); + uint8_t tag; + PARSE(printer, parser_ch, &tag); + + overflow_status st; + uint64_t dis; + struct ident name; + parser_namespace_type ns; + char *orig_out; + + switch(tag) { + case 'C': + PARSE(printer, parser_disambiguator, &dis); + PARSE(printer, parser_ident, &name); + + PRINT_IDENT(printer, &name); + + if (printer->out != NULL && !printer->alternate && dis != 0) { + PRINT_STR(printer, "["); + char buf[24] = {0}; + sprintf(buf, "%llx", (unsigned long long)dis); + PRINT_STR(printer, buf); + PRINT_STR(printer, "]"); + } + break; + case 'N': + PARSE(printer, parser_namespace, &ns); + if ((st = printer_print_path(printer, in_value)) != OverflowOk) { + return st; + } + + // HACK(eddyb) if the parser is already marked as having errored, + // `parse!` below will print a `?` without its preceding `::` + // (because printing the `::` is skipped in certain conditions, + // i.e. a lowercase namespace with an empty identifier), + // so in order to get `::?`, the `::` has to be printed here. + if (printer->status != DemangleOk) { + PRINT_STR(printer, "::"); + } + + PARSE(printer, parser_disambiguator, &dis); + PARSE(printer, parser_ident, &name); + // Special namespace, like closures and shims + if (ns) { + PRINT_STR(printer, "::{"); + if (ns == 'C') { + PRINT_STR(printer, "closure"); + } else if (ns == 'S') { + PRINT_STR(printer, "shim"); + } else { + PRINT_CH(printer, ns); + } + if (name.ascii_len != 0 || name.punycode_len != 0) { + PRINT_STR(printer, ":"); + PRINT_IDENT(printer, &name); + } + PRINT_STR(printer, "#"); + PRINT_U64(printer, dis); + PRINT_STR(printer, "}"); + } else { + // Implementation-specific/unspecified namespaces + if (name.ascii_len != 0 || name.punycode_len != 0) { + PRINT_STR(printer, "::"); + PRINT_IDENT(printer, &name); + } + } + break; + case 'M': + case 'X': + // for impls, ignore the impls own path + PARSE(printer, parser_disambiguator, &dis); + orig_out = printer->out; + printer->out = NULL; + PRINT(printer_print_path(printer, false)); + printer->out = orig_out; + + // fallthru + case 'Y': + PRINT_STR(printer, "<"); + PRINT(printer_print_type(printer)); + if (tag != 'M') { + PRINT_STR(printer, " as "); + PRINT(printer_print_path(printer, false)); + } + PRINT_STR(printer, ">"); + break; + case 'I': + PRINT(printer_print_path(printer, in_value)); + if (in_value) { + PRINT_STR(printer, "::"); + } + PRINT(printer_print_generic_args(printer)); + break; + case 'B': + PRINT(printer_print_backref(printer, in_value ? printer_print_path_in_value : printer_print_path_out_of_value, NULL)); + break; + default: + INVALID(printer); + break; + } + + printer_pop_depth(printer); + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_const_uint(struct printer *printer, uint8_t tag) { + struct buf hex; + PARSE(printer, parser_hex_nibbles, &hex); + + uint64_t val; + if (try_parse_uint(hex.start, hex.len, &val)) { + PRINT_U64(printer, val); + } else { + PRINT_STR(printer, "0x"); + PRINT(printer_print_buf(printer, hex.start, hex.len)); + } + + if (printer->out != NULL && !printer->alternate) { + const char *ty = basic_type(tag); + if (/* safety */ ty != NULL) { + PRINT_STR(printer, ty); + } + } + + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_const_str_literal(struct printer *printer) { + struct buf hex; + PARSE(printer, parser_hex_nibbles, &hex); + + size_t out_len = SIZE_MAX; + nibbles_to_string_status nts_status = nibbles_to_string(hex.start, hex.len, NULL, &out_len); + switch (nts_status) { + case NtsOk: + if (printer->out != NULL) { + out_len = printer->out_len; + nts_status = nibbles_to_string(hex.start, hex.len, (uint8_t*)printer->out, &out_len); + if (nts_status != NtsOk) { + return OverflowOverflow; + } + printer->out += out_len; + printer->out_len -= out_len; + } + return OverflowOk; + case NtsOverflow: + // technically if there is a string of size `SIZE_MAX/6` whose escaped version overflows + // SIZE_MAX but has an invalid char, this will be a "fake" overflow. In practice, + // that is not going to happen and a fuzzer will not generate strings of this length. + return OverflowOverflow; + case NtsInvalid: + default: + INVALID(printer); + } +} + +static NODISCARD overflow_status printer_print_const_struct(struct printer *printer) { + uint64_t dis; + struct ident name; + PARSE(printer, parser_disambiguator, &dis); + PARSE(printer, parser_ident, &name); + PRINT_IDENT(printer, &name); + PRINT_STR(printer, ": "); + return printer_print_const(printer, true); +} + +static NODISCARD overflow_status printer_print_const_out_of_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_const(printer, false); +} + +static NODISCARD overflow_status printer_print_const_in_value(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_const(printer, true); +} + +static NODISCARD overflow_status printer_print_const(struct printer *printer, bool in_value) { + uint8_t tag; + + PARSE(printer, parser_ch, &tag); + PARSE(printer, parser_push_depth); + + struct buf hex; + uint64_t val; + size_t count; + + bool opened_brace = false; +#define OPEN_BRACE_IF_OUTSIDE_EXPR \ + do { if (!in_value) { \ + opened_brace = true; \ + PRINT_STR(printer, "{"); \ + } } while(0) + + switch(tag) { + case 'p': + PRINT_STR(printer, "_"); + break; + // Primitive leaves with hex-encoded values (see `basic_type`). + case 'a': + case 's': + case 'l': + case 'x': + case 'n': + case 'i': + if (printer_eat(printer, 'n')) { + PRINT_STR(printer, "-"); + } + /* fallthrough */ + case 'h': + case 't': + case 'm': + case 'y': + case 'o': + case 'j': + PRINT(printer_print_const_uint(printer, tag)); + break; + case 'b': + PARSE(printer, parser_hex_nibbles, &hex); + if (try_parse_uint(hex.start, hex.len, &val)) { + if (val == 0) { + PRINT_STR(printer, "false"); + } else if (val == 1) { + PRINT_STR(printer, "true"); + } else { + INVALID(printer); + } + } else { + INVALID(printer); + } + break; + case 'c': + PARSE(printer, parser_hex_nibbles, &hex); + if (try_parse_uint(hex.start, hex.len, &val) + && val < UINT32_MAX + && validate_char((uint32_t)val)) + { + char escaped_buf[ESCAPED_SIZE]; + size_t escaped_size = char_to_string((uint32_t)val, '\'', true, &escaped_buf); + + PRINT_STR(printer, "'"); + PRINT(printer_print_buf(printer, escaped_buf, escaped_size)); + PRINT_STR(printer, "'"); + } else { + INVALID(printer); + } + break; + case 'e': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "*"); + PRINT(printer_print_const_str_literal(printer)); + break; + case 'R': + case 'Q': + if (tag == 'R' && printer_eat(printer, 'e')) { + PRINT(printer_print_const_str_literal(printer)); + } else { + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "&"); + if (tag != 'R') { + PRINT_STR(printer, "mut "); + } + PRINT(printer_print_const(printer, true)); + } + break; + case 'A': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "["); + PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", "); + PRINT_STR(printer, "]"); + break; + case 'T': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT_STR(printer, "("); + PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_const(printer, true)), ", "); + if (count == 1) { + PRINT_STR(printer, ","); + } + PRINT_STR(printer, ")"); + break; + case 'V': + OPEN_BRACE_IF_OUTSIDE_EXPR; + PRINT(printer_print_path(printer, true)); + PARSE(printer, parser_ch, &tag); + switch(tag) { + case 'U': + break; + case 'T': + PRINT_STR(printer, "("); + PRINT_SEP_LIST(printer, PRINT(printer_print_const(printer, true)), ", "); + PRINT_STR(printer, ")"); + break; + case 'S': + PRINT_STR(printer, " { "); + PRINT_SEP_LIST(printer, PRINT(printer_print_const_struct(printer)), ", "); + PRINT_STR(printer, " }"); + break; + default: + INVALID(printer); + } + break; + case 'B': + PRINT(printer_print_backref(printer, in_value ? printer_print_const_in_value : printer_print_const_out_of_value, NULL)); + break; + default: + INVALID(printer); + } +#undef OPEN_BRACE_IF_OUTSIDE_EXPR + + if (opened_brace) { + PRINT_STR(printer, "}"); + } + printer_pop_depth(printer); + + return OverflowOk; +} + +/// A trait in a trait object may have some "existential projections" +/// (i.e. associated type bindings) after it, which should be printed +/// in the `<...>` of the trait, e.g. `dyn Trait<T, U, Assoc=X>`. +/// To this end, this method will keep the `<...>` of an 'I' path +/// open, by omitting the `>`, and return `Ok(true)` in that case. +static NODISCARD overflow_status printer_print_maybe_open_generics(struct printer *printer, bool *open) { + if (printer_eat(printer, 'B')) { + // NOTE(eddyb) the closure may not run if printing is being skipped, + // but in that case the returned boolean doesn't matter. + *open = false; + return printer_print_backref(printer, printer_print_maybe_open_generics, open); + } else if(printer_eat(printer, 'I')) { + PRINT(printer_print_path(printer, false)); + PRINT_STR(printer, "<"); + PRINT_SEP_LIST(printer, PRINT(printer_print_generic_arg(printer)), ", "); + *open = true; + return OverflowOk; + } else { + PRINT(printer_print_path(printer, false)); + *open = false; + return OverflowOk; + } +} + +static NODISCARD overflow_status printer_print_dyn_trait(struct printer *printer) { + bool open; + PRINT(printer_print_maybe_open_generics(printer, &open)); + + while (printer_eat(printer, 'p')) { + if (!open) { + PRINT_STR(printer, "<"); + open = true; + } else { + PRINT_STR(printer, ", "); + } + + struct ident name; + PARSE(printer, parser_ident, &name); + + PRINT_IDENT(printer, &name); + PRINT_STR(printer, " = "); + PRINT(printer_print_type(printer)); + } + + if (open) { + PRINT_STR(printer, ">"); + } + + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_object_bounds(struct printer *printer) { + PRINT_SEP_LIST(printer, PRINT(printer_print_dyn_trait(printer)), " + "); + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_function_type(struct printer *printer) { + bool is_unsafe = printer_eat(printer, 'U'); + const char *abi; + size_t abi_len; + if (printer_eat(printer, 'K')) { + if (printer_eat(printer, 'C')) { + abi = "C"; + abi_len = 1; + } else { + struct ident abi_ident; + PARSE(printer, parser_ident, &abi_ident); + if (abi_ident.ascii_len == 0 || abi_ident.punycode_len != 0) { + INVALID(printer); + } + abi = abi_ident.ascii_start; + abi_len = abi_ident.ascii_len; + } + } else { + abi = NULL; + abi_len = 0; + } + + if (is_unsafe) { + PRINT_STR(printer, "unsafe "); + } + + if (abi != NULL) { + PRINT_STR(printer, "extern \""); + + // replace _ with - + while (abi_len > 0) { + const char *minus = memchr(abi, '_', abi_len); + if (minus == NULL) { + PRINT(printer_print_buf(printer, (const char*)abi, abi_len)); + break; + } else { + size_t space_to_minus = minus - abi; + PRINT(printer_print_buf(printer, (const char*)abi, space_to_minus)); + PRINT_STR(printer, "-"); + abi = minus + 1; + abi_len -= (space_to_minus + 1); + } + } + + PRINT_STR(printer, "\" "); + } + + PRINT_STR(printer, "fn("); + PRINT_SEP_LIST(printer, PRINT(printer_print_type(printer)), ", "); + PRINT_STR(printer, ")"); + + if (printer_eat(printer, 'u')) { + // Skip printing the return type if it's 'u', i.e. `()`. + } else { + PRINT_STR(printer, " -> "); + PRINT(printer_print_type(printer)); + } + + return OverflowOk; +} + +static NODISCARD overflow_status printer_print_type_backref(struct printer *printer, bool *_arg) { + (void)_arg; + return printer_print_type(printer); +} + +static NODISCARD overflow_status printer_print_type(struct printer *printer) { + uint8_t tag; + PARSE(printer, parser_ch, &tag); + + const char *basic_ty = basic_type(tag); + if (basic_ty) { + return printer_print_str(printer, basic_ty); + } + + uint64_t count; + uint64_t lt; + + PARSE(printer, parser_push_depth); + switch (tag) { + case 'R': + case 'Q': + PRINT_STR(printer, "&"); + if (printer_eat(printer, 'L')) { + PARSE(printer, parser_integer_62, <); + if (lt != 0) { + PRINT(printer_print_lifetime_from_index(printer, lt)); + PRINT_STR(printer, " "); + } + } + if (tag != 'R') { + PRINT_STR(printer, "mut "); + } + PRINT(printer_print_type(printer)); + break; + case 'P': + case 'O': + PRINT_STR(printer, "*"); + if (tag != 'P') { + PRINT_STR(printer, "mut "); + } else { + PRINT_STR(printer, "const "); + } + PRINT(printer_print_type(printer)); + break; + case 'A': + case 'S': + PRINT_STR(printer, "["); + PRINT(printer_print_type(printer)); + if (tag == 'A') { + PRINT_STR(printer, "; "); + PRINT(printer_print_const(printer, true)); + } + PRINT_STR(printer, "]"); + break; + case 'T': + PRINT_STR(printer, "("); + PRINT_SEP_LIST_COUNT(printer, count, PRINT(printer_print_type(printer)), ", "); + if (count == 1) { + PRINT_STR(printer, ","); + } + PRINT_STR(printer, ")"); + break; + case 'F': + PRINT(printer_in_binder(printer, printer_print_function_type)); + break; + case 'D': + PRINT_STR(printer, "dyn "); + PRINT(printer_in_binder(printer, printer_print_object_bounds)); + + if (!printer_eat(printer, 'L')) { + INVALID(printer); + } + PARSE(printer, parser_integer_62, <); + + if (lt != 0) { + PRINT_STR(printer, " + "); + PRINT(printer_print_lifetime_from_index(printer, lt)); + } + break; + case 'B': + PRINT(printer_print_backref(printer, printer_print_type_backref, NULL)); + break; + default: + // Go back to the tag, so `print_path` also sees it. + if (printer->status == DemangleOk && /* safety */ printer->parser.next > 0) { + printer->parser.next--; + } + PRINT(printer_print_path(printer, false)); + } + + printer_pop_depth(printer); + return OverflowOk; +} + +NODISCARD static demangle_status rust_demangle_legacy_demangle(const char *s, size_t s_len, struct demangle_legacy *res, const char **rest) +{ + if (s_len > strlen(s)) { + // s_len only exists to shorten the string, this is not a buffer API + return DemangleInvalid; + } + + const char *inner; + size_t inner_len; + if (s_len >= 3 && !strncmp(s, "_ZN", 3)) { + inner = s + 3; + inner_len = s_len - 3; + } else if (s_len >= 2 && !strncmp(s, "ZN", 2)) { + // On Windows, dbghelp strips leading underscores, so we accept "ZN...E" + // form too. + inner = s + 2; + inner_len = s_len - 2; + } else if (s_len >= 4 && !strncmp(s, "__ZN", 4)) { + // On OSX, symbols are prefixed with an extra _ + inner = s + 4; + inner_len = s_len - 4; + } else { + return DemangleInvalid; + } + + if (!str_isascii(inner, inner_len)) { + return DemangleInvalid; + } + + size_t elements = 0; + const char *chars = inner; + size_t chars_len = inner_len; + if (chars_len == 0) { + return DemangleInvalid; + } + char c; + while ((c = *chars) != 'E') { + // Decode an identifier element's length + if (c < '0' || c > '9') { + return DemangleInvalid; + } + size_t len = 0; + while (c >= '0' && c <= '9') { + size_t d = c - '0'; + if (len > SIZE_MAX / 10) { + return DemangleInvalid; + } + len *= 10; + if (len > SIZE_MAX - d) { + return DemangleInvalid; + } + len += d; + + chars++; + chars_len--; + if (chars_len == 0) { + return DemangleInvalid; + } + c = *chars; + } + + // Advance by the length + if (chars_len <= len) { + return DemangleInvalid; + } + chars += len; + chars_len -= len; + elements++; + } + *res = (struct demangle_legacy) { inner, inner_len, elements }; + *rest = chars + 1; + return DemangleOk; +} + +static bool is_rust_hash(const char *s, size_t len) { + if (len == 0 || s[0] != 'h') { + return false; + } + + for (size_t i = 1; i < len; i++) { + if (!((s[i] >= '0' && s[i] <= '9') || (s[i] >= 'a' && s[i] <= 'f') || (s[i] >= 'A' && s[i] <= 'F'))) { + return false; + } + } + + return true; +} + +NODISCARD static overflow_status rust_demangle_legacy_display_demangle(struct demangle_legacy res, char *out, size_t len, bool alternate) +{ + struct printer printer = { + // not actually using the parser part of the printer, just keeping it to share the format functions + DemangleOk, + { NULL }, + out, + len, + 0, + alternate + }; + const char *inner = res.mangled; + for (size_t element = 0; element < res.elements; element++) { + size_t i = 0; + const char *rest; + for (rest = inner; rest < res.mangled + res.mangled_len && *rest >= '0' && *rest <= '9'; rest++) { + i *= 10; + i += *rest - '0'; + } + if ((size_t)(res.mangled + res.mangled_len - rest) < i) { + // safety: shouldn't reach this place if the input string is validated. bail out. + // safety: we knwo rest <= res.mangled + res.mangled_len from the for-loop above + break; + } + + size_t len = i; + inner = rest + len; + + // From here on, inner contains a pointer to the next element, rest[:len] to the current one + if (alternate && element + 1 == res.elements && is_rust_hash(rest, i)) { + break; + } + if (element != 0) { + PRINT_STR(&printer, "::"); + } + + if (len >= 2 && !strncmp(rest, "_$", 2)) { + rest++; + len--; + } + + while (len > 0) { + if (rest[0] == '.') { + if (len >= 2 && rest[1] == '.') { + PRINT_STR(&printer, "::"); + rest += 2; + len -= 2; + } else { + PRINT_STR(&printer, "."); + rest += 1; + len -= 1; + } + } else if (rest[0] == '$') { + const char *escape = memchr(rest + 1, '$', len - 1); + if (escape == NULL) { + break; + } + const char *escape_start = rest + 1; + size_t escape_len = escape - (rest + 1); + + size_t next_len = len - (escape + 1 - rest); + const char *next_rest = escape + 1; + + char ch; + if ((escape_len == 2 && escape_start[0] == 'S' && escape_start[1] == 'P')) { + ch = '@'; + } else if ((escape_len == 2 && escape_start[0] == 'B' && escape_start[1] == 'P')) { + ch = '*'; + } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'F')) { + ch = '&'; + } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'T')) { + ch = '<'; + } else if ((escape_len == 2 && escape_start[0] == 'G' && escape_start[1] == 'T')) { + ch = '>'; + } else if ((escape_len == 2 && escape_start[0] == 'L' && escape_start[1] == 'P')) { + ch = '('; + } else if ((escape_len == 2 && escape_start[0] == 'R' && escape_start[1] == 'P')) { + ch = ')'; + } else if ((escape_len == 1 && escape_start[0] == 'C')) { + ch = ','; + } else { + if (escape_len > 1 && escape_start[0] == 'u') { + escape_start++; + escape_len--; + uint64_t val; + if (try_parse_uint(escape_start, escape_len, &val) + && val < UINT32_MAX + && validate_char((uint32_t)val)) + { + if (!unicode_iscontrol(val)) { + uint8_t wchr[4]; + size_t wchr_len = code_to_utf8(wchr, (uint32_t)val); + PRINT(printer_print_buf(&printer, (const char*)wchr, wchr_len)); + len = next_len; + rest = next_rest; + continue; + } + } + } + break; // print the rest of this element raw + } + PRINT_CH(&printer, ch); + len = next_len; + rest = next_rest; + } else { + size_t j = 0; + for (;j < len && rest[j] != '$' && rest[j] != '.';j++); + if (j == len) { + break; + } + PRINT(printer_print_buf(&printer, rest, j)); + rest += j; + len -= j; + } + } + PRINT(printer_print_buf(&printer, rest, len)); + } + + if (printer.out_len < OVERFLOW_MARGIN) { + return OverflowOverflow; + } + *printer.out = '\0'; + return OverflowOk; +} + +static bool is_symbol_like(const char *s, size_t len) { + // rust-demangle definition of symbol like: control characters and space are not symbol-like, all else is + for (size_t i = 0; i < len; i++) { + char ch = s[i]; + if (!(ch >= 0x21 && ch <= 0x7e)) { + return false; + } + } + return true; +} + +void rust_demangle_demangle(const char *s, struct demangle *res) +{ + // During ThinLTO LLVM may import and rename internal symbols, so strip out + // those endings first as they're one of the last manglings applied to symbol + // names. + const char *llvm = ".llvm."; + const char *found_llvm = strstr(s, llvm); + size_t s_len = strlen(s); + if (found_llvm) { + const char *all_hex_ptr = found_llvm + strlen(".llvm."); + bool all_hex = true; + for (;*all_hex_ptr;all_hex_ptr++) { + if (!(('0' <= *all_hex_ptr && *all_hex_ptr <= '9') || + ('A' <= *all_hex_ptr && *all_hex_ptr <= 'F') || + *all_hex_ptr == '@')) { + all_hex = false; + break; + } + } + + if (all_hex) { + s_len = found_llvm - s; + } + } + + const char *suffix; + struct demangle_legacy legacy; + demangle_status st = rust_demangle_legacy_demangle(s, s_len, &legacy, &suffix); + if (st == DemangleOk) { + *res = (struct demangle) { + .style=DemangleStyleLegacy, + .mangled=legacy.mangled, + .mangled_len=legacy.mangled_len, + .elements=legacy.elements, + .original=s, + .original_len=s_len, + .suffix=suffix, + .suffix_len=s_len - (suffix - s), + }; + } else { + struct demangle_v0 v0; + st = rust_demangle_v0_demangle(s, s_len, &v0, &suffix); + if (st == DemangleOk) { + *res = (struct demangle) { + .style=DemangleStyleV0, + .mangled=v0.mangled, + .mangled_len=v0.mangled_len, + .elements=0, + .original=s, + .original_len=s_len, + .suffix=suffix, + .suffix_len=s_len - (suffix - s), + }; + } else { + *res = (struct demangle) { + .style=DemangleStyleUnknown, + .mangled=NULL, + .mangled_len=0, + .elements=0, + .original=s, + .original_len=s_len, + .suffix=s, + .suffix_len=0, + }; + } + } + + // Output like LLVM IR adds extra period-delimited words. See if + // we are in that case and save the trailing words if so. + if (res->suffix_len) { + if (res->suffix[0] == '.' && is_symbol_like(res->suffix, res->suffix_len)) { + // Keep the suffix + } else { + // Reset the suffix and invalidate the demangling + res->style = DemangleStyleUnknown; + res->suffix_len = 0; + } + } +} + +bool rust_demangle_is_known(struct demangle *res) { + return res->style != DemangleStyleUnknown; +} + +overflow_status rust_demangle_display_demangle(struct demangle const *res, char *out, size_t len, bool alternate) { + size_t original_len = res->original_len; + size_t out_len; + switch (res->style) { + case DemangleStyleUnknown: + if (len < original_len) { + return OverflowOverflow; + } else { + memcpy(out, res->original, original_len); + out += original_len; + len -= original_len; + break; + } + break; + case DemangleStyleLegacy: { + struct demangle_legacy legacy = { + res->mangled, + res->mangled_len, + res->elements + }; + if (rust_demangle_legacy_display_demangle(legacy, out, len, alternate) == OverflowOverflow) { + return OverflowOverflow; + } + out_len = strlen(out); + out += out_len; + len -= out_len; + break; + } + case DemangleStyleV0: { + struct demangle_v0 v0 = { + res->mangled, + res->mangled_len + }; + if (rust_demangle_v0_display_demangle(v0, out, len, alternate) == OverflowOverflow) { + return OverflowOverflow; + } + out_len = strlen(out); + out += out_len; + len -= out_len; + break; + } + } + size_t suffix_len = res->suffix_len; + if (len < suffix_len || len - suffix_len < OVERFLOW_MARGIN) { + return OverflowOverflow; + } + memcpy(out, res->suffix, suffix_len); + out[suffix_len] = 0; + return OverflowOk; +} diff --git a/tools/perf/util/demangle-rust-v0.h b/tools/perf/util/demangle-rust-v0.h new file mode 100644 index 000000000000..d0092818610a --- /dev/null +++ b/tools/perf/util/demangle-rust-v0.h @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// The contents of this file come from the Rust rustc-demangle library, hosted +// in the <https://github.com/rust-lang/rustc-demangle> repository, licensed +// under "Apache-2.0 OR MIT". For copyright details, see +// <https://github.com/rust-lang/rustc-demangle/blob/main/README.md>. +// Please note that the file should be kept as close as possible to upstream. + +#ifndef _H_DEMANGLE_V0_H +#define _H_DEMANGLE_V0_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stddef.h> + +#if defined(__GNUC__) || defined(__clang__) +#define DEMANGLE_NODISCARD __attribute__((warn_unused_result)) +#else +#define DEMANGLE_NODISCARD +#endif + +typedef enum { + OverflowOk, + OverflowOverflow +} overflow_status; + +enum demangle_style { + DemangleStyleUnknown = 0, + DemangleStyleLegacy, + DemangleStyleV0, +}; + +// Not using a union here to make the struct easier to copy-paste if needed. +struct demangle { + enum demangle_style style; + // points to the "mangled" part of the name, + // not including `ZN` or `R` prefixes. + const char *mangled; + size_t mangled_len; + // In DemangleStyleLegacy, is the number of path elements + size_t elements; + // while it's called "original", it will not contain `.llvm.9D1C9369@@16` suffixes + // that are to be ignored. + const char *original; + size_t original_len; + // Contains the part after the mangled name that is to be outputted, + // which can be `.exit.i.i` suffixes LLVM sometimes adds. + const char *suffix; + size_t suffix_len; +}; + +// if the length of the output buffer is less than `output_len-OVERFLOW_MARGIN`, +// the demangler will return `OverflowOverflow` even if there is no overflow. +#define OVERFLOW_MARGIN 4 + +/// Demangle a C string that refers to a Rust symbol and put the demangle intermediate result in `res`. +/// Beware that `res` contains references into `s`. If `s` is modified (or free'd) before calling +/// `rust_demangle_display_demangle` behavior is undefined. +/// +/// Use `rust_demangle_display_demangle` to convert it to an actual string. +void rust_demangle_demangle(const char *s, struct demangle *res); + +/// Write the string in a `struct demangle` into a buffer. +/// +/// Return `OverflowOk` if the output buffer was sufficiently big, `OverflowOverflow` if it wasn't. +/// This function is `O(n)` in the length of the input + *output* [$], but the demangled output of demangling a symbol can +/// be exponentially[$$] large, therefore it is recommended to have a sane bound (`rust-demangle` +/// uses 1,000,000 bytes) on `len`. +/// +/// `alternate`, if true, uses the less verbose alternate formatting (Rust `{:#}`) is used, which does not show +/// symbol hashes and types of constant ints. +/// +/// [$] It's `O(n * MAX_DEPTH)`, but `MAX_DEPTH` is a constant 300 and therefore it's `O(n)` +/// [$$] Technically, bounded by `O(n^MAX_DEPTH)`, but this is practically exponential. +DEMANGLE_NODISCARD overflow_status rust_demangle_display_demangle(struct demangle const *res, char *out, size_t len, bool alternate); + +/// Returns true if `res` refers to a known valid Rust demangling style, false if it's an unknown style. +bool rust_demangle_is_known(struct demangle *res); + +#undef DEMANGLE_NODISCARD + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/tools/perf/util/demangle-rust.c b/tools/perf/util/demangle-rust.c deleted file mode 100644 index a659fc69f73a..000000000000 --- a/tools/perf/util/demangle-rust.c +++ /dev/null @@ -1,269 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <string.h> -#include "debug.h" - -#include "demangle-rust.h" - -/* - * Mangled Rust symbols look like this: - * - * _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a - * - * The original symbol is: - * - * <std::sys::fd::FileDesc as core::ops::Drop>::drop - * - * The last component of the path is a 64-bit hash in lowercase hex, prefixed - * with "h". Rust does not have a global namespace between crates, an illusion - * which Rust maintains by using the hash to distinguish things that would - * otherwise have the same symbol. - * - * Any path component not starting with a XID_Start character is prefixed with - * "_". - * - * The following escape sequences are used: - * - * "," => $C$ - * "@" => $SP$ - * "*" => $BP$ - * "&" => $RF$ - * "<" => $LT$ - * ">" => $GT$ - * "(" => $LP$ - * ")" => $RP$ - * " " => $u20$ - * "'" => $u27$ - * "[" => $u5b$ - * "]" => $u5d$ - * "~" => $u7e$ - * - * A double ".." means "::" and a single "." means "-". - * - * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$ - */ - -static const char *hash_prefix = "::h"; -static const size_t hash_prefix_len = 3; -static const size_t hash_len = 16; - -static bool is_prefixed_hash(const char *start); -static bool looks_like_rust(const char *sym, size_t len); -static bool unescape(const char **in, char **out, const char *seq, char value); - -/* - * INPUT: - * sym: symbol that has been through BFD-demangling - * - * This function looks for the following indicators: - * - * 1. The hash must consist of "h" followed by 16 lowercase hex digits. - * - * 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible - * hex digits. This is true of 99.9998% of hashes so once in your life you - * may see a false negative. The point is to notice path components that - * could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In - * this case a false positive (non-Rust symbol has an important path - * component removed because it looks like a Rust hash) is worse than a - * false negative (the rare Rust symbol is not demangled) so this sets the - * balance in favor of false negatives. - * - * 3. There must be no characters other than a-zA-Z0-9 and _.:$ - * - * 4. There must be no unrecognized $-sign sequences. - * - * 5. There must be no sequence of three or more dots in a row ("..."). - */ -bool -rust_is_mangled(const char *sym) -{ - size_t len, len_without_hash; - - if (!sym) - return false; - - len = strlen(sym); - if (len <= hash_prefix_len + hash_len) - /* Not long enough to contain "::h" + hash + something else */ - return false; - - len_without_hash = len - (hash_prefix_len + hash_len); - if (!is_prefixed_hash(sym + len_without_hash)) - return false; - - return looks_like_rust(sym, len_without_hash); -} - -/* - * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex - * digits must comprise between 5 and 15 (inclusive) distinct digits. - */ -static bool is_prefixed_hash(const char *str) -{ - const char *end; - bool seen[16]; - size_t i; - int count; - - if (strncmp(str, hash_prefix, hash_prefix_len)) - return false; - str += hash_prefix_len; - - memset(seen, false, sizeof(seen)); - for (end = str + hash_len; str < end; str++) - if (*str >= '0' && *str <= '9') - seen[*str - '0'] = true; - else if (*str >= 'a' && *str <= 'f') - seen[*str - 'a' + 10] = true; - else - return false; - - /* Count how many distinct digits seen */ - count = 0; - for (i = 0; i < 16; i++) - if (seen[i]) - count++; - - return count >= 5 && count <= 15; -} - -static bool looks_like_rust(const char *str, size_t len) -{ - const char *end = str + len; - - while (str < end) - switch (*str) { - case '$': - if (!strncmp(str, "$C$", 3)) - str += 3; - else if (!strncmp(str, "$SP$", 4) - || !strncmp(str, "$BP$", 4) - || !strncmp(str, "$RF$", 4) - || !strncmp(str, "$LT$", 4) - || !strncmp(str, "$GT$", 4) - || !strncmp(str, "$LP$", 4) - || !strncmp(str, "$RP$", 4)) - str += 4; - else if (!strncmp(str, "$u20$", 5) - || !strncmp(str, "$u27$", 5) - || !strncmp(str, "$u5b$", 5) - || !strncmp(str, "$u5d$", 5) - || !strncmp(str, "$u7e$", 5)) - str += 5; - else - return false; - break; - case '.': - /* Do not allow three or more consecutive dots */ - if (!strncmp(str, "...", 3)) - return false; - /* Fall through */ - case 'a' ... 'z': - case 'A' ... 'Z': - case '0' ... '9': - case '_': - case ':': - str++; - break; - default: - return false; - } - - return true; -} - -/* - * INPUT: - * sym: symbol for which rust_is_mangled(sym) returns true - * - * The input is demangled in-place because the mangled name is always longer - * than the demangled one. - */ -void -rust_demangle_sym(char *sym) -{ - const char *in; - char *out; - const char *end; - - if (!sym) - return; - - in = sym; - out = sym; - end = sym + strlen(sym) - (hash_prefix_len + hash_len); - - while (in < end) - switch (*in) { - case '$': - if (!(unescape(&in, &out, "$C$", ',') - || unescape(&in, &out, "$SP$", '@') - || unescape(&in, &out, "$BP$", '*') - || unescape(&in, &out, "$RF$", '&') - || unescape(&in, &out, "$LT$", '<') - || unescape(&in, &out, "$GT$", '>') - || unescape(&in, &out, "$LP$", '(') - || unescape(&in, &out, "$RP$", ')') - || unescape(&in, &out, "$u20$", ' ') - || unescape(&in, &out, "$u27$", '\'') - || unescape(&in, &out, "$u5b$", '[') - || unescape(&in, &out, "$u5d$", ']') - || unescape(&in, &out, "$u7e$", '~'))) { - pr_err("demangle-rust: unexpected escape sequence"); - goto done; - } - break; - case '_': - /* - * If this is the start of a path component and the next - * character is an escape sequence, ignore the - * underscore. The mangler inserts an underscore to make - * sure the path component begins with a XID_Start - * character. - */ - if ((in == sym || in[-1] == ':') && in[1] == '$') - in++; - else - *out++ = *in++; - break; - case '.': - if (in[1] == '.') { - /* ".." becomes "::" */ - *out++ = ':'; - *out++ = ':'; - in += 2; - } else { - /* "." becomes "-" */ - *out++ = '-'; - in++; - } - break; - case 'a' ... 'z': - case 'A' ... 'Z': - case '0' ... '9': - case ':': - *out++ = *in++; - break; - default: - pr_err("demangle-rust: unexpected character '%c' in symbol\n", - *in); - goto done; - } - -done: - *out = '\0'; -} - -static bool unescape(const char **in, char **out, const char *seq, char value) -{ - size_t len = strlen(seq); - - if (strncmp(*in, seq, len)) - return false; - - **out = value; - - *in += len; - *out += 1; - - return true; -} diff --git a/tools/perf/util/demangle-rust.h b/tools/perf/util/demangle-rust.h deleted file mode 100644 index 2fca618b1aa5..000000000000 --- a/tools/perf/util/demangle-rust.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PERF_DEMANGLE_RUST -#define __PERF_DEMANGLE_RUST 1 - -bool rust_is_mangled(const char *str); -void rust_demangle_sym(char *str); - -#endif /* __PERF_DEMANGLE_RUST */ diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index 8619b6eea62d..057fcf4225ac 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1349,6 +1349,16 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name, return dso; } +static void __dso__set_long_name_id(struct dso *dso, const char *name, bool name_allocated) +{ + if (dso__long_name_allocated(dso)) + free((char *)dso__long_name(dso)); + + RC_CHK_ACCESS(dso)->long_name = name; + RC_CHK_ACCESS(dso)->long_name_len = strlen(name); + dso__set_long_name_allocated(dso, name_allocated); +} + static void dso__set_long_name_id(struct dso *dso, const char *name, bool name_allocated) { struct dsos *dsos = dso__dsos(dso); @@ -1362,18 +1372,11 @@ static void dso__set_long_name_id(struct dso *dso, const char *name, bool name_a * renaming the dso. */ down_write(&dsos->lock); - } - - if (dso__long_name_allocated(dso)) - free((char *)dso__long_name(dso)); - - RC_CHK_ACCESS(dso)->long_name = name; - RC_CHK_ACCESS(dso)->long_name_len = strlen(name); - dso__set_long_name_allocated(dso, name_allocated); - - if (dsos) { + __dso__set_long_name_id(dso, name, name_allocated); dsos->sorted = false; up_write(&dsos->lock); + } else { + __dso__set_long_name_id(dso, name, name_allocated); } } @@ -1451,6 +1454,16 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) dso__set_long_name_id(dso, name, name_allocated); } +static void __dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) +{ + if (dso__short_name_allocated(dso)) + free((char *)dso__short_name(dso)); + + RC_CHK_ACCESS(dso)->short_name = name; + RC_CHK_ACCESS(dso)->short_name_len = strlen(name); + dso__set_short_name_allocated(dso, name_allocated); +} + void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) { struct dsos *dsos = dso__dsos(dso); @@ -1464,17 +1477,11 @@ void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) * renaming the dso. */ down_write(&dsos->lock); - } - if (dso__short_name_allocated(dso)) - free((char *)dso__short_name(dso)); - - RC_CHK_ACCESS(dso)->short_name = name; - RC_CHK_ACCESS(dso)->short_name_len = strlen(name); - dso__set_short_name_allocated(dso, name_allocated); - - if (dsos) { + __dso__set_short_name(dso, name, name_allocated); dsos->sorted = false; up_write(&dsos->lock); + } else { + __dso__set_short_name(dso, name, name_allocated); } } diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c index e0998e2a7c4e..4d213017d202 100644 --- a/tools/perf/util/dsos.c +++ b/tools/perf/util/dsos.c @@ -157,6 +157,7 @@ static struct dso *__dsos__find_by_longname_id(struct dsos *dsos, const char *name, const struct dso_id *id, bool write_locked) + SHARED_LOCKS_REQUIRED(dsos->lock) { struct dsos__key key = { .long_name = name, @@ -262,6 +263,7 @@ static int dsos__find_id_cb(struct dso *dso, void *data) static struct dso *__dsos__find_id(struct dsos *dsos, const char *name, const struct dso_id *id, bool cmp_short, bool write_locked) + SHARED_LOCKS_REQUIRED(dsos->lock) { struct dso *res; @@ -338,6 +340,7 @@ static struct dso *__dsos__addnew_id(struct dsos *dsos, const char *name, const } static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, const struct dso_id *id) + SHARED_LOCKS_REQUIRED(dsos->lock) { struct dso *dso = __dsos__find_id(dsos, name, id, false, /*write_locked=*/true); diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index c23b77f8f854..7544a3104e21 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -77,6 +77,7 @@ static const char *perf_event__names[] = { [PERF_RECORD_HEADER_FEATURE] = "FEATURE", [PERF_RECORD_COMPRESSED] = "COMPRESSED", [PERF_RECORD_FINISHED_INIT] = "FINISHED_INIT", + [PERF_RECORD_COMPRESSED2] = "COMPRESSED2", }; const char *perf_event__name(unsigned int id) @@ -448,12 +449,13 @@ int perf_event__exit_del_thread(const struct perf_tool *tool __maybe_unused, size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp) { - return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s]\n", + return fprintf(fp, " offset: %#"PRI_lx64" size: %#"PRI_lx64" flags: %#"PRI_lx64" [%s%s%s%s]\n", event->aux.aux_offset, event->aux.aux_size, event->aux.flags, event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "", event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "", - event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : ""); + event->aux.flags & PERF_AUX_FLAG_PARTIAL ? "P" : "", + event->aux.flags & PERF_AUX_FLAG_COLLISION ? "C" : ""); } size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c1a04141aed0..dcd1130502df 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -35,6 +35,7 @@ #include "util/util.h" #include "util/env.h" #include "util/intel-tpebs.h" +#include "util/strbuf.h" #include <signal.h> #include <unistd.h> #include <sched.h> @@ -183,7 +184,6 @@ void evlist__delete(struct evlist *evlist) if (evlist == NULL) return; - tpebs_delete(); evlist__free_stats(evlist); evlist__munmap(evlist); evlist__close(evlist); @@ -2468,23 +2468,36 @@ struct evsel *evlist__find_evsel(struct evlist *evlist, int idx) return NULL; } -int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf) +void evlist__format_evsels(struct evlist *evlist, struct strbuf *sb, size_t max_length) { - struct evsel *evsel; - int printed = 0; + struct evsel *evsel, *leader = NULL; + bool first = true; evlist__for_each_entry(evlist, evsel) { + struct evsel *new_leader = evsel__leader(evsel); + if (evsel__is_dummy_event(evsel)) continue; - if (size > (strlen(evsel__name(evsel)) + (printed ? 2 : 1))) { - printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "," : "", evsel__name(evsel)); - } else { - printed += scnprintf(bf + printed, size - printed, "%s...", printed ? "," : ""); - break; + + if (leader != new_leader && leader && leader->core.nr_members > 1) + strbuf_addch(sb, '}'); + + if (!first) + strbuf_addch(sb, ','); + + if (sb->len > max_length) { + strbuf_addstr(sb, "..."); + return; } - } + if (leader != new_leader && new_leader->core.nr_members > 1) + strbuf_addch(sb, '{'); - return printed; + strbuf_addstr(sb, evsel__name(evsel)); + first = false; + leader = new_leader; + } + if (leader && leader->core.nr_members > 1) + strbuf_addch(sb, '}'); } void evlist__check_mem_load_aux(struct evlist *evlist) @@ -2552,34 +2565,56 @@ void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_lis perf_cpu_map__put(user_requested_cpus); } -void evlist__uniquify_name(struct evlist *evlist) +/* Should uniquify be disabled for the evlist? */ +static bool evlist__disable_uniquify(const struct evlist *evlist) { - char *new_name, empty_attributes[2] = ":", *attributes; - struct evsel *pos; + struct evsel *counter; + struct perf_pmu *last_pmu = NULL; + bool first = true; - if (perf_pmus__num_core_pmus() == 1) - return; + evlist__for_each_entry(evlist, counter) { + /* If PMUs vary then uniquify can be useful. */ + if (!first && counter->pmu != last_pmu) + return false; + first = false; + if (counter->pmu) { + /* Allow uniquify for uncore PMUs. */ + if (!counter->pmu->is_core) + return false; + /* Keep hybrid event names uniquified for clarity. */ + if (perf_pmus__num_core_pmus() > 1) + return false; + } + last_pmu = counter->pmu; + } + return true; +} - evlist__for_each_entry(evlist, pos) { - if (!evsel__is_hybrid(pos)) - continue; +static bool evlist__set_needs_uniquify(struct evlist *evlist, const struct perf_stat_config *config) +{ + struct evsel *counter; + bool needs_uniquify = false; - if (strchr(pos->name, '/')) - continue; + if (evlist__disable_uniquify(evlist)) { + evlist__for_each_entry(evlist, counter) + counter->uniquified_name = true; + return false; + } + + evlist__for_each_entry(evlist, counter) { + if (evsel__set_needs_uniquify(counter, config)) + needs_uniquify = true; + } + return needs_uniquify; +} - attributes = strchr(pos->name, ':'); - if (attributes) - *attributes = '\0'; - else - attributes = empty_attributes; +void evlist__uniquify_evsel_names(struct evlist *evlist, const struct perf_stat_config *config) +{ + if (evlist__set_needs_uniquify(evlist, config)) { + struct evsel *pos; - if (asprintf(&new_name, "%s/%s/%s", pos->pmu ? pos->pmu->name : "", - pos->name, attributes + 1)) { - free(pos->name); - pos->name = new_name; - } else { - *attributes = ':'; - } + evlist__for_each_entry(evlist, pos) + evsel__uniquify_counter(pos); } } diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index edcbf1c10e92..85859708393e 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -19,7 +19,9 @@ struct pollfd; struct thread_map; struct perf_cpu_map; +struct perf_stat_config; struct record_opts; +struct strbuf; struct target; /* @@ -430,10 +432,10 @@ int event_enable_timer__process(struct event_enable_timer *eet); struct evsel *evlist__find_evsel(struct evlist *evlist, int idx); -int evlist__scnprintf_evsels(struct evlist *evlist, size_t size, char *bf); +void evlist__format_evsels(struct evlist *evlist, struct strbuf *sb, size_t max_length); void evlist__check_mem_load_aux(struct evlist *evlist); void evlist__warn_user_requested_cpus(struct evlist *evlist, const char *cpu_list); -void evlist__uniquify_name(struct evlist *evlist); +void evlist__uniquify_evsel_names(struct evlist *evlist, const struct perf_stat_config *config); bool evlist__has_bpf_output(struct evlist *evlist); bool evlist__needs_bpf_sb_event(struct evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3c030da2e477..d55482f094bf 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -552,11 +552,11 @@ struct evsel *evsel__clone(struct evsel *dest, struct evsel *orig) evsel->exclude_GH = orig->exclude_GH; evsel->sample_read = orig->sample_read; - evsel->auto_merge_stats = orig->auto_merge_stats; evsel->collect_stat = orig->collect_stat; evsel->weak_group = orig->weak_group; evsel->use_config_name = orig->use_config_name; evsel->pmu = orig->pmu; + evsel->first_wildcard_match = orig->first_wildcard_match; if (evsel__copy_config_terms(evsel, orig) < 0) goto out_err; @@ -1275,9 +1275,10 @@ static void evsel__set_default_freq_period(struct record_opts *opts, } } -static bool evsel__is_offcpu_event(struct evsel *evsel) +bool evsel__is_offcpu_event(struct evsel *evsel) { - return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT); + return evsel__is_bpf_output(evsel) && evsel__name_is(evsel, OFFCPU_EVENT) && + evsel->core.attr.sample_type & PERF_SAMPLE_RAW; } /* @@ -1425,7 +1426,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, evsel__set_sample_bit(evsel, CPU); } - if (opts->sample_address) + if (opts->sample_data_src) evsel__set_sample_bit(evsel, DATA_SRC); if (opts->sample_phys_addr) @@ -1440,9 +1441,10 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, attr->branch_sample_type = opts->branch_stack; } - if (opts->sample_weight) + if (opts->sample_weight || evsel->retire_lat) { arch_evsel__set_sample_weight(evsel); - + evsel->retire_lat = false; + } attr->task = track; attr->mmap = track; attr->mmap2 = track && !perf_missing_features.mmap2; @@ -1554,8 +1556,10 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, if (evsel__is_dummy_event(evsel)) evsel__reset_sample_bit(evsel, BRANCH_STACK); - if (evsel__is_offcpu_event(evsel)) + if (evsel__is_offcpu_event(evsel)) { evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES; + attr->inherit = 0; + } arch__post_evsel_config(evsel, attr); } @@ -1656,6 +1660,8 @@ void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); assert(evsel->evlist == NULL); + if (evsel__is_retire_lat(evsel)) + evsel__tpebs_close(evsel); bpf_counter__destroy(evsel); perf_bpf_filter__destroy(evsel); evsel__free_counts(evsel); @@ -1718,11 +1724,6 @@ static int evsel__read_one(struct evsel *evsel, int cpu_map_idx, int thread) return perf_evsel__read(&evsel->core, cpu_map_idx, thread, count); } -static int evsel__read_retire_lat(struct evsel *evsel, int cpu_map_idx, int thread) -{ - return tpebs_set_evsel(evsel, cpu_map_idx, thread); -} - static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, u64 val, u64 ena, u64 run, u64 lost) { @@ -1730,8 +1731,8 @@ static void evsel__set_count(struct evsel *counter, int cpu_map_idx, int thread, count = perf_counts(counter->counts, cpu_map_idx, thread); - if (counter->retire_lat) { - evsel__read_retire_lat(counter, cpu_map_idx, thread); + if (evsel__is_retire_lat(counter)) { + evsel__tpebs_read(counter, cpu_map_idx, thread); perf_counts__set_loaded(counter->counts, cpu_map_idx, thread, true); return; } @@ -1889,7 +1890,7 @@ int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread); if (evsel__is_retire_lat(evsel)) - return evsel__read_retire_lat(evsel, cpu_map_idx, thread); + return evsel__tpebs_read(evsel, cpu_map_idx, thread); if (evsel->core.attr.read_format & PERF_FORMAT_GROUP) return evsel__read_group(evsel, cpu_map_idx, thread); @@ -2576,7 +2577,7 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_cpu cpu; if (evsel__is_retire_lat(evsel)) - return tpebs_start(evsel->evlist); + return evsel__tpebs_open(evsel); err = __evsel__prepare_open(evsel, cpus, threads); if (err) @@ -2737,7 +2738,7 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, void evsel__close(struct evsel *evsel) { if (evsel__is_retire_lat(evsel)) - tpebs_delete(); + evsel__tpebs_close(evsel); perf_evsel__close(&evsel->core); perf_evsel__free_id(&evsel->core); } @@ -2923,6 +2924,35 @@ static inline bool evsel__has_branch_counters(const struct evsel *evsel) return false; } +static int __set_offcpu_sample(struct perf_sample *data) +{ + u64 *array = data->raw_data; + u32 max_size = data->raw_size, *p32; + const void *endp = (void *)array + max_size; + + if (array == NULL) + return -EFAULT; + + OVERFLOW_CHECK_u64(array); + p32 = (void *)array++; + data->pid = p32[0]; + data->tid = p32[1]; + + OVERFLOW_CHECK_u64(array); + data->period = *array++; + + OVERFLOW_CHECK_u64(array); + data->callchain = (struct ip_callchain *)array++; + OVERFLOW_CHECK(array, data->callchain->nr * sizeof(u64), max_size); + data->ip = data->callchain->ips[1]; + array += data->callchain->nr; + + OVERFLOW_CHECK_u64(array); + data->cgroup = *array; + + return 0; +} + int evsel__parse_sample(struct evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -3277,6 +3307,9 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event, array = (void *)array + sz; } + if (evsel__is_offcpu_event(evsel)) + return __set_offcpu_sample(data); + return 0; } @@ -3752,6 +3785,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "%s", "No hardware sampling interrupt available.\n"); #endif + if (!target__has_cpu(target)) + return scnprintf(msg, size, + "Unsupported event (%s) in per-thread mode, enable system wide with '-a'.", + evsel__name(evsel)); break; case EBUSY: if (find_process("oprofiled")) @@ -3917,3 +3954,120 @@ void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader) leader->core.nr_members--; } } + +bool evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config) +{ + struct evsel *evsel; + + if (counter->needs_uniquify) { + /* Already set. */ + return true; + } + + if (counter->use_config_name || counter->is_libpfm_event) { + /* Original name will be used. */ + return false; + } + + if (!config->hybrid_merge && evsel__is_hybrid(counter)) { + /* Unique hybrid counters necessary. */ + counter->needs_uniquify = true; + return true; + } + + if (counter->core.attr.type < PERF_TYPE_MAX && counter->core.attr.type != PERF_TYPE_RAW) { + /* Legacy event, don't uniquify. */ + return false; + } + + if (counter->pmu && counter->pmu->is_core && + counter->alternate_hw_config != PERF_COUNT_HW_MAX) { + /* A sysfs or json event replacing a legacy event, don't uniquify. */ + return false; + } + + if (config->aggr_mode == AGGR_NONE) { + /* Always unique with no aggregation. */ + counter->needs_uniquify = true; + return true; + } + + if (counter->first_wildcard_match != NULL) { + /* + * If stats are merged then only the first_wildcard_match is + * displayed, there is no need to uniquify this evsel as the + * name won't be shown. + */ + return false; + } + + /* + * Do other non-merged events in the evlist have the same name? If so + * uniquify is necessary. + */ + evlist__for_each_entry(counter->evlist, evsel) { + if (evsel == counter || evsel->first_wildcard_match || evsel->pmu == counter->pmu) + continue; + + if (evsel__name_is(counter, evsel__name(evsel))) { + counter->needs_uniquify = true; + return true; + } + } + return false; +} + +void evsel__uniquify_counter(struct evsel *counter) +{ + const char *name, *pmu_name; + char *new_name, *config; + int ret; + + /* No uniquification necessary. */ + if (!counter->needs_uniquify) + return; + + /* The evsel was already uniquified. */ + if (counter->uniquified_name) + return; + + /* Avoid checking to uniquify twice. */ + counter->uniquified_name = true; + + name = evsel__name(counter); + pmu_name = counter->pmu->name; + /* Already prefixed by the PMU name. */ + if (!strncmp(name, pmu_name, strlen(pmu_name))) + return; + + config = strchr(name, '/'); + if (config) { + int len = config - name; + + if (config[1] == '/') { + /* case: event// */ + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2); + } else { + /* case: event/.../ */ + ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1); + } + } else { + config = strchr(name, ':'); + if (config) { + /* case: event:.. */ + int len = config - name; + + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1); + } else { + /* case: event */ + ret = asprintf(&new_name, "%s/%s/", pmu_name, name); + } + } + if (ret > 0) { + free(counter->name); + counter->name = new_name; + } else { + /* ENOMEM from asprintf. */ + counter->uniquified_name = false; + } +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index aae431d63d64..6dbc9690e0c9 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -16,6 +16,7 @@ struct bpf_object; struct cgroup; struct perf_counts; +struct perf_stat_config; struct perf_stat_evsel; union perf_event; struct bpf_counter_ops; @@ -69,6 +70,11 @@ struct evsel { const char *unit; struct cgroup *cgrp; const char *metric_id; + /* + * This point to the first evsel with the same name, intended to store the + * aggregated counts in aggregation mode. + */ + struct evsel *first_wildcard_match; /* parse modifier helper */ int exclude_GH; int sample_read; @@ -77,7 +83,6 @@ struct evsel { bool percore; bool precise_max; bool is_libpfm_event; - bool auto_merge_stats; bool collect_stat; bool weak_group; bool bpf_counter; @@ -114,7 +119,6 @@ struct evsel { bool ignore_missing_thread; bool forced_leader; bool cmdline_group_boundary; - bool merged_stat; bool reset_group; bool errored; bool needs_auxtrace_mmap; @@ -177,6 +181,12 @@ struct evsel { /* For tool events */ /* Beginning time subtracted when the counter is read. */ union { + /* Defaults for retirement latency events. */ + struct _retirement_latency { + double mean; + double min; + double max; + } retirement_latency; /* duration_time is a single global time. */ __u64 start_time; /* @@ -542,6 +552,9 @@ void evsel__remove_from_group(struct evsel *evsel, struct evsel *leader); bool arch_evsel__must_be_in_group(const struct evsel *evsel); +bool evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config); +void evsel__uniquify_counter(struct evsel *counter); + /* * Macro to swap the bit-field postition and size. * Used when, @@ -557,4 +570,6 @@ u64 evsel__bitfield_swap_branch_flags(u64 value); void evsel__set_config_if_unset(struct perf_pmu *pmu, struct evsel *evsel, const char *config_name, u64 val); +bool evsel__is_offcpu_event(struct evsel *evsel); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h index af52a1516d0b..94a1e9cf73d6 100644 --- a/tools/perf/util/evsel_config.h +++ b/tools/perf/util/evsel_config.h @@ -48,6 +48,7 @@ struct evsel_config_term { u32 aux_sample_size; u64 cfg_chg; char *str; + int cpu; } val; bool weak; }; diff --git a/tools/perf/util/fncache.c b/tools/perf/util/fncache.c index 6225cbc52310..bf9559c55c63 100644 --- a/tools/perf/util/fncache.c +++ b/tools/perf/util/fncache.c @@ -1,53 +1,58 @@ // SPDX-License-Identifier: GPL-2.0-only /* Manage a cache of file names' existence */ +#include <pthread.h> #include <stdlib.h> -#include <unistd.h> #include <string.h> -#include <linux/list.h> +#include <unistd.h> +#include <linux/compiler.h> #include "fncache.h" +#include "hashmap.h" -struct fncache { - struct hlist_node nd; - bool res; - char name[]; -}; +static struct hashmap *fncache; -#define FNHSIZE 61 +static size_t fncache__hash(long key, void *ctx __maybe_unused) +{ + return str_hash((const char *)key); +} -static struct hlist_head fncache_hash[FNHSIZE]; +static bool fncache__equal(long key1, long key2, void *ctx __maybe_unused) +{ + return strcmp((const char *)key1, (const char *)key2) == 0; +} -unsigned shash(const unsigned char *s) +static void fncache__init(void) { - unsigned h = 0; - while (*s) - h = 65599 * h + *s++; - return h ^ (h >> 16); + fncache = hashmap__new(fncache__hash, fncache__equal, /*ctx=*/NULL); +} + +static struct hashmap *fncache__get(void) +{ + static pthread_once_t fncache_once = PTHREAD_ONCE_INIT; + + pthread_once(&fncache_once, fncache__init); + + return fncache; } static bool lookup_fncache(const char *name, bool *res) { - int h = shash((const unsigned char *)name) % FNHSIZE; - struct fncache *n; - - hlist_for_each_entry(n, &fncache_hash[h], nd) { - if (!strcmp(n->name, name)) { - *res = n->res; - return true; - } - } - return false; + long val; + + if (!hashmap__find(fncache__get(), name, &val)) + return false; + + *res = (val != 0); + return true; } static void update_fncache(const char *name, bool res) { - struct fncache *n = malloc(sizeof(struct fncache) + strlen(name) + 1); - int h = shash((const unsigned char *)name) % FNHSIZE; - - if (!n) - return; - strcpy(n->name, name); - n->res = res; - hlist_add_head(&n->nd, &fncache_hash[h]); + char *old_key = NULL, *key = strdup(name); + + if (key) { + hashmap__set(fncache__get(), key, res, &old_key, /*old_value*/NULL); + free(old_key); + } } /* No LRU, only use when bounded in some other way. */ diff --git a/tools/perf/util/fncache.h b/tools/perf/util/fncache.h index fe020beaefb1..b6a0f209493e 100644 --- a/tools/perf/util/fncache.h +++ b/tools/perf/util/fncache.h @@ -1,7 +1,6 @@ #ifndef _FCACHE_H #define _FCACHE_H 1 -unsigned shash(const unsigned char *s); bool file_available(const char *name); #endif diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index d65228c11412..afc6855327ab 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -336,6 +336,69 @@ static void he_stat__decay(struct he_stat *he_stat) he_stat->latency = (he_stat->latency * 7) / 8; } +static int hists__update_mem_stat(struct hists *hists, struct hist_entry *he, + struct mem_info *mi, u64 period) +{ + if (hists->nr_mem_stats == 0) + return 0; + + if (he->mem_stat == NULL) { + he->mem_stat = calloc(hists->nr_mem_stats, sizeof(*he->mem_stat)); + if (he->mem_stat == NULL) + return -1; + } + + for (int i = 0; i < hists->nr_mem_stats; i++) { + int idx = mem_stat_index(hists->mem_stat_types[i], + mem_info__const_data_src(mi)->val); + + assert(0 <= idx && idx < MEM_STAT_LEN); + he->mem_stat[i].entries[idx] += period; + hists->mem_stat_total[i].entries[idx] += period; + } + return 0; +} + +static void hists__add_mem_stat(struct hists *hists, struct hist_entry *dst, + struct hist_entry *src) +{ + if (hists->nr_mem_stats == 0) + return; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + dst->mem_stat[i].entries[k] += src->mem_stat[i].entries[k]; + } +} + +static int hists__clone_mem_stat(struct hists *hists, struct hist_entry *dst, + struct hist_entry *src) +{ + if (hists->nr_mem_stats == 0) + return 0; + + dst->mem_stat = calloc(hists->nr_mem_stats, sizeof(*dst->mem_stat)); + if (dst->mem_stat == NULL) + return -1; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + dst->mem_stat[i].entries[k] = src->mem_stat[i].entries[k]; + } + return 0; +} + +static void hists__decay_mem_stat(struct hists *hists, struct hist_entry *he) +{ + if (hists->nr_mem_stats == 0) + return; + + for (int i = 0; i < hists->nr_mem_stats; i++) { + for (int k = 0; k < MEM_STAT_LEN; k++) + he->mem_stat[i].entries[k] = (he->mem_stat[i].entries[k] * 7) / 8; + } +} + static void hists__delete_entry(struct hists *hists, struct hist_entry *he); static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) @@ -350,6 +413,7 @@ static bool hists__decay_entry(struct hists *hists, struct hist_entry *he) if (symbol_conf.cumulate_callchain) he_stat__decay(he->stat_acc); decay_callchain(he->callchain); + hists__decay_mem_stat(hists, he); if (!he->depth) { u64 period_diff = prev_period - he->stat.period; @@ -693,6 +757,10 @@ out: he_stat__add_cpumode_period(&he->stat, al->cpumode, period); if (symbol_conf.cumulate_callchain) he_stat__add_cpumode_period(he->stat_acc, al->cpumode, period); + if (hists__update_mem_stat(hists, he, entry->mem_info, period) < 0) { + hist_entry__delete(he); + return NULL; + } return he; } @@ -1423,6 +1491,7 @@ void hist_entry__delete(struct hist_entry *he) free_callchain(he->callchain); zfree(&he->trace_output); zfree(&he->raw_data); + zfree(&he->mem_stat); ops->free(he); } @@ -1572,6 +1641,7 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, cmp = hist_entry__collapse_hierarchy(hpp_list, iter, he); if (!cmp) { he_stat__add_stat(&iter->stat, &he->stat); + hists__add_mem_stat(hists, iter, he); return iter; } @@ -1613,6 +1683,11 @@ static struct hist_entry *hierarchy_insert_entry(struct hists *hists, new->srcfile = NULL; } + if (hists__clone_mem_stat(hists, new, he) < 0) { + hist_entry__delete(new); + return NULL; + } + rb_link_node(&new->rb_node_in, parent, p); rb_insert_color_cached(&new->rb_node_in, root, leftmost); return new; @@ -1695,6 +1770,7 @@ static int hists__collapse_insert_entry(struct hists *hists, he_stat__add_stat(&iter->stat, &he->stat); if (symbol_conf.cumulate_callchain) he_stat__add_stat(iter->stat_acc, he->stat_acc); + hists__add_mem_stat(hists, iter, he); if (hist_entry__has_callchains(he) && symbol_conf.use_callchain) { struct callchain_cursor *cursor = get_tls_callchain_cursor(); @@ -2978,6 +3054,8 @@ static void hists_evsel__exit(struct evsel *evsel) struct perf_hpp_list_node *node, *tmp; hists__delete_all_entries(hists); + zfree(&hists->mem_stat_types); + zfree(&hists->mem_stat_total); list_for_each_entry_safe(node, tmp, &hists->hpp_formats, list) { perf_hpp_list__for_each_format_safe(&node->hpp, fmt, pos) { diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 317d06cca8b8..c64254088fc7 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -9,6 +9,7 @@ #include "events_stats.h" #include "evsel.h" #include "map_symbol.h" +#include "mem-events.h" #include "mutex.h" #include "sample.h" #include "spark.h" @@ -41,6 +42,7 @@ enum hist_column { HISTC_TIME, HISTC_DSO, HISTC_THREAD, + HISTC_TGID, HISTC_COMM, HISTC_CGROUP_ID, HISTC_CGROUP, @@ -100,6 +102,13 @@ enum hist_column { struct thread; struct dso; +#define MEM_STAT_LEN 8 + +struct he_mem_stat { + /* meaning of entries depends on enum mem_stat_type */ + u64 entries[MEM_STAT_LEN]; +}; + struct hists { struct rb_root_cached entries_in_array[2]; struct rb_root_cached *entries_in; @@ -125,6 +134,9 @@ struct hists { struct perf_hpp_list *hpp_list; struct list_head hpp_formats; int nr_hpp_node; + int nr_mem_stats; + enum mem_stat_type *mem_stat_types; + struct he_mem_stat *mem_stat_total; }; #define hists__has(__h, __f) (__h)->hpp_list->__f @@ -232,6 +244,7 @@ struct hist_entry { } pairs; struct he_stat stat; struct he_stat *stat_acc; + struct he_mem_stat *mem_stat; struct map_symbol ms; struct thread *thread; struct comm *comm; @@ -576,18 +589,25 @@ enum { PERF_HPP__WEIGHT1, PERF_HPP__WEIGHT2, PERF_HPP__WEIGHT3, + PERF_HPP__MEM_STAT_OP, + PERF_HPP__MEM_STAT_CACHE, + PERF_HPP__MEM_STAT_MEMORY, + PERF_HPP__MEM_STAT_SNOOP, + PERF_HPP__MEM_STAT_DTLB, PERF_HPP__MAX_INDEX }; void perf_hpp__init(void); -void perf_hpp__cancel_cumulate(void); -void perf_hpp__cancel_latency(void); +void perf_hpp__cancel_cumulate(struct evlist *evlist); +void perf_hpp__cancel_latency(struct evlist *evlist); void perf_hpp__setup_output_field(struct perf_hpp_list *list); void perf_hpp__reset_output_field(struct perf_hpp_list *list); void perf_hpp__append_sort_keys(struct perf_hpp_list *list); int perf_hpp__setup_hists_formats(struct perf_hpp_list *list, struct evlist *evlist); +int perf_hpp__alloc_mem_stats(struct perf_hpp_list *list, + struct evlist *evlist); bool perf_hpp__is_sort_entry(struct perf_hpp_fmt *format); @@ -643,6 +663,9 @@ int hpp__fmt_acc(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he, hpp_field_fn get_field, const char *fmtstr, hpp_snprint_fn print_fn, enum perf_hpp_fmt_type fmtype); +int hpp__fmt_mem_stat(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he, enum mem_stat_type mst, + const char *fmtstr, hpp_snprint_fn print_fn); static inline void advance_hpp(struct perf_hpp *hpp, int inc) { diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c index 3cce77fc8004..c25e7296f1c1 100644 --- a/tools/perf/util/hwmon_pmu.c +++ b/tools/perf/util/hwmon_pmu.c @@ -346,42 +346,43 @@ struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, const cha { char buf[32]; struct hwmon_pmu *hwm; + __u32 type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10); + + if (type > PERF_PMU_TYPE_HWMON_END) { + pr_err("Unable to encode hwmon type from %s in valid PMU type\n", sysfs_name); + return NULL; + } + + snprintf(buf, sizeof(buf), "hwmon_%s", name); + fix_name(buf + 6); hwm = zalloc(sizeof(*hwm)); if (!hwm) return NULL; - hwm->hwmon_dir_fd = hwmon_dir; - hwm->pmu.type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10); - if (hwm->pmu.type > PERF_PMU_TYPE_HWMON_END) { - pr_err("Unable to encode hwmon type from %s in valid PMU type\n", sysfs_name); - goto err_out; + if (perf_pmu__init(&hwm->pmu, type, buf) != 0) { + perf_pmu__delete(&hwm->pmu); + return NULL; } - snprintf(buf, sizeof(buf), "hwmon_%s", name); - fix_name(buf + 6); - hwm->pmu.name = strdup(buf); - if (!hwm->pmu.name) - goto err_out; + + hwm->hwmon_dir_fd = hwmon_dir; hwm->pmu.alias_name = strdup(sysfs_name); - if (!hwm->pmu.alias_name) - goto err_out; + if (!hwm->pmu.alias_name) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } hwm->pmu.cpus = perf_cpu_map__new("0"); - if (!hwm->pmu.cpus) - goto err_out; + if (!hwm->pmu.cpus) { + perf_pmu__delete(&hwm->pmu); + return NULL; + } INIT_LIST_HEAD(&hwm->pmu.format); - INIT_LIST_HEAD(&hwm->pmu.aliases); INIT_LIST_HEAD(&hwm->pmu.caps); hashmap__init(&hwm->events, hwmon_pmu__event_hashmap_hash, hwmon_pmu__event_hashmap_equal, /*ctx=*/NULL); list_add_tail(&hwm->pmu.list, pmus); return &hwm->pmu; -err_out: - free((char *)hwm->pmu.name); - free(hwm->pmu.alias_name); - free(hwm); - close(hwmon_dir); - return NULL; } void hwmon_pmu__exit(struct perf_pmu *pmu) diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 4e8a9b172fbc..9b1011fe4826 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -127,6 +127,7 @@ struct intel_pt { bool single_pebs; bool sample_pebs; + int pebs_data_src_fmt; struct evsel *pebs_evsel; u64 evt_sample_type; @@ -175,6 +176,7 @@ enum switch_state { struct intel_pt_pebs_event { struct evsel *evsel; u64 id; + int data_src_fmt; }; struct intel_pt_queue { @@ -2272,7 +2274,146 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack, } } -static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, u64 id) +#define P(a, b) PERF_MEM_S(a, b) +#define OP_LH (P(OP, LOAD) | P(LVL, HIT)) +#define LEVEL(x) P(LVLNUM, x) +#define REM P(REMOTE, REMOTE) +#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS)) + +#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10 +#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1) + +/* Based on kernel __intel_pmu_pebs_data_source_grt() and pebs_data_source */ +static const u64 pebs_data_source_grt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, HIT), /* RAM hit|SNP Hit */ + OP_LH | P(LVL, REM_RAM1) | REM | LEVEL(L3) | P(SNOOP, HIT), /* Remote L3 hit|SNP Hit */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | SNOOP_NONE_MISS, /* RAM hit|SNP None or Miss */ + OP_LH | P(LVL, REM_RAM1) | LEVEL(RAM) | REM | SNOOP_NONE_MISS, /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel __intel_pmu_pebs_data_source_cmt() and pebs_data_source */ +static const u64 pebs_data_source_cmt[PERF_PEBS_DATA_SOURCE_GRT_MAX] = { + P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* L3 miss|SNP N/A */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* L1 hit|SNP None */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* LFB/MAB hit|SNP None */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* L2 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* L3 hit|SNP None */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, MISS), /* L3 hit|SNP Hit */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HIT), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* L3 hit|SNP HitM */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* L3 hit|SNP Fwd */ + OP_LH | P(LVL, REM_CCE1) | REM | LEVEL(L3) | P(SNOOP, HITM), /* Remote L3 hit|SNP HitM */ + OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* RAM hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, NONE), /* Remote L3 hit|SNP Hit */ + OP_LH | LEVEL(RAM) | REM | P(SNOOPX, FWD), /* RAM hit|SNP None or Miss */ + OP_LH | LEVEL(RAM) | REM | P(SNOOP, HITM), /* Remote RAM hit|SNP None or Miss */ + OP_LH | P(LVL, IO) | LEVEL(NA) | P(SNOOP, NONE), /* I/O hit|SNP None */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* Uncached hit|SNP None */ +}; + +/* Based on kernel pebs_set_tlb_lock() */ +static inline void pebs_set_tlb_lock(u64 *val, bool tlb, bool lock) +{ + /* + * TLB access + * 0 = did not miss 2nd level TLB + * 1 = missed 2nd level TLB + */ + if (tlb) + *val |= P(TLB, MISS) | P(TLB, L2); + else + *val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + /* locked prefix */ + if (lock) + *val |= P(LOCK, LOCKED); +} + +/* Based on kernel __grt_latency_data() */ +static u64 intel_pt_grt_latency_data(u8 dse, bool tlb, bool lock, bool blk, + const u64 *pebs_data_source) +{ + u64 val; + + dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK; + val = pebs_data_source[dse]; + + pebs_set_tlb_lock(&val, tlb, lock); + + if (blk) + val |= P(BLK, DATA); + else + val |= P(BLK, NA); + + return val; +} + +/* Default value for data source */ +#define PERF_MEM_NA (PERF_MEM_S(OP, NA) |\ + PERF_MEM_S(LVL, NA) |\ + PERF_MEM_S(SNOOP, NA) |\ + PERF_MEM_S(LOCK, NA) |\ + PERF_MEM_S(TLB, NA) |\ + PERF_MEM_S(LVLNUM, NA)) + +enum DATA_SRC_FORMAT { + DATA_SRC_FORMAT_ERR = -1, + DATA_SRC_FORMAT_NA = 0, + DATA_SRC_FORMAT_GRT = 1, + DATA_SRC_FORMAT_CMT = 2, +}; + +/* Based on kernel grt_latency_data() and cmt_latency_data */ +static u64 intel_pt_get_data_src(u64 mem_aux_info, int data_src_fmt) +{ + switch (data_src_fmt) { + case DATA_SRC_FORMAT_GRT: { + union { + u64 val; + struct { + unsigned int dse:4; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:25; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_grt); + } + case DATA_SRC_FORMAT_CMT: { + union { + u64 val; + struct { + unsigned int dse:5; + unsigned int locked:1; + unsigned int stlb_miss:1; + unsigned int fwd_blk:1; + unsigned int reserved:24; + }; + } x = {.val = mem_aux_info}; + return intel_pt_grt_latency_data(x.dse, x.stlb_miss, x.locked, x.fwd_blk, + pebs_data_source_cmt); + } + default: + return PERF_MEM_NA; + } +} + +static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evsel *evsel, + u64 id, int data_src_fmt) { const struct intel_pt_blk_items *items = &ptq->state->items; struct perf_sample sample; @@ -2393,6 +2534,18 @@ static int intel_pt_do_synth_pebs_sample(struct intel_pt_queue *ptq, struct evse } } + if (sample_type & PERF_SAMPLE_DATA_SRC) { + if (items->has_mem_aux_info && data_src_fmt) { + if (data_src_fmt < 0) { + pr_err("Intel PT missing data_src info\n"); + return -1; + } + sample.data_src = intel_pt_get_data_src(items->mem_aux_info, data_src_fmt); + } else { + sample.data_src = PERF_MEM_NA; + } + } + if (sample_type & PERF_SAMPLE_TRANSACTION && items->has_tsx_aux_info) { u64 ax = items->has_rax ? items->rax : 0; /* Refer kernel's intel_hsw_transaction() */ @@ -2413,9 +2566,10 @@ static int intel_pt_synth_single_pebs_sample(struct intel_pt_queue *ptq) { struct intel_pt *pt = ptq->pt; struct evsel *evsel = pt->pebs_evsel; + int data_src_fmt = pt->pebs_data_src_fmt; u64 id = evsel->core.id[0]; - return intel_pt_do_synth_pebs_sample(ptq, evsel, id); + return intel_pt_do_synth_pebs_sample(ptq, evsel, id, data_src_fmt); } static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) @@ -2440,7 +2594,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq) hw_id); return intel_pt_synth_single_pebs_sample(ptq); } - err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id); + err = intel_pt_do_synth_pebs_sample(ptq, pe->evsel, pe->id, pe->data_src_fmt); if (err) return err; } @@ -3407,6 +3561,49 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt, event->itrace_start.tid); } +/* + * Events with data_src are identified by L1_Hit_Indication + * refer https://github.com/intel/perfmon + */ +static int intel_pt_data_src_fmt(struct intel_pt *pt, struct evsel *evsel) +{ + struct perf_env *env = pt->machine->env; + int fmt = DATA_SRC_FORMAT_NA; + + if (!env->cpuid) + return DATA_SRC_FORMAT_ERR; + + /* + * PEBS-via-PT is only supported on E-core non-hybrid. Of those only + * Gracemont and Crestmont have data_src. Check for: + * Alderlake N (Gracemont) + * Sierra Forest (Crestmont) + * Grand Ridge (Crestmont) + */ + + if (!strncmp(env->cpuid, "GenuineIntel,6,190,", 19)) + fmt = DATA_SRC_FORMAT_GRT; + + if (!strncmp(env->cpuid, "GenuineIntel,6,175,", 19) || + !strncmp(env->cpuid, "GenuineIntel,6,182,", 19)) + fmt = DATA_SRC_FORMAT_CMT; + + if (fmt == DATA_SRC_FORMAT_NA) + return fmt; + + /* + * Only data_src events are: + * mem-loads event=0xd0,umask=0x5 + * mem-stores event=0xd0,umask=0x6 + */ + if (evsel->core.attr.type == PERF_TYPE_RAW && + ((evsel->core.attr.config & 0xffff) == 0x5d0 || + (evsel->core.attr.config & 0xffff) == 0x6d0)) + return fmt; + + return DATA_SRC_FORMAT_NA; +} + static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, union perf_event *event, struct perf_sample *sample) @@ -3427,6 +3624,7 @@ static int intel_pt_process_aux_output_hw_id(struct intel_pt *pt, ptq->pebs[hw_id].evsel = evsel; ptq->pebs[hw_id].id = sample->id; + ptq->pebs[hw_id].data_src_fmt = intel_pt_data_src_fmt(pt, evsel); return 0; } @@ -3976,6 +4174,7 @@ static void intel_pt_setup_pebs_events(struct intel_pt *pt) } pt->single_pebs = true; pt->sample_pebs = true; + pt->pebs_data_src_fmt = intel_pt_data_src_fmt(pt, evsel); pt->pebs_evsel = evsel; } } diff --git a/tools/perf/util/intel-tpebs.c b/tools/perf/util/intel-tpebs.c index 2c421b475b3b..3b92ebf5c112 100644 --- a/tools/perf/util/intel-tpebs.c +++ b/tools/perf/util/intel-tpebs.c @@ -3,7 +3,7 @@ * intel_tpebs.c: Intel TPEBS support */ - +#include <api/fs/fs.h> #include <sys/param.h> #include <subcmd/run-command.h> #include <thread.h> @@ -12,13 +12,17 @@ #include <linux/zalloc.h> #include <linux/err.h> #include "sample.h" +#include "counts.h" #include "debug.h" #include "evlist.h" #include "evsel.h" +#include "mutex.h" #include "session.h" +#include "stat.h" #include "tool.h" #include "cpumap.h" #include "metricgroup.h" +#include "stat.h" #include <sys/stat.h> #include <sys/file.h> #include <poll.h> @@ -27,95 +31,155 @@ #define PERF_DATA "-" bool tpebs_recording; -static pid_t tpebs_pid = -1; -static size_t tpebs_event_size; +enum tpebs_mode tpebs_mode; static LIST_HEAD(tpebs_results); static pthread_t tpebs_reader_thread; -static struct child_process *tpebs_cmd; +static struct child_process tpebs_cmd; +static int control_fd[2], ack_fd[2]; +static struct mutex tpebs_mtx; struct tpebs_retire_lat { struct list_head nd; - /* Event name */ - const char *name; - /* Event name with the TPEBS modifier R */ - const char *tpebs_name; - /* Count of retire_latency values found in sample data */ - size_t count; - /* Sum of all the retire_latency values in sample data */ - int sum; - /* Average of retire_latency, val = sum / count */ - double val; + /** @evsel: The evsel that opened the retire_lat event. */ + struct evsel *evsel; + /** @event: Event passed to perf record. */ + char *event; + /** @stats: Recorded retirement latency stats. */ + struct stats stats; + /** @last: Last retirement latency read. */ + uint64_t last; + /* Has the event been sent to perf record? */ + bool started; }; -static int get_perf_record_args(const char **record_argv, char buf[], - const char *cpumap_buf) +static void tpebs_mtx_init(void) +{ + mutex_init(&tpebs_mtx); +} + +static struct mutex *tpebs_mtx_get(void) { - struct tpebs_retire_lat *e; - int i = 0; + static pthread_once_t tpebs_mtx_once = PTHREAD_ONCE_INIT; + + pthread_once(&tpebs_mtx_once, tpebs_mtx_init); + return &tpebs_mtx; +} - pr_debug("tpebs: Prepare perf record for retire_latency\n"); +static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel) + EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get()); + +static int evsel__tpebs_start_perf_record(struct evsel *evsel) +{ + const char **record_argv; + int tpebs_event_size = 0, i = 0, ret; + char control_fd_buf[32]; + char cpumap_buf[50]; + struct tpebs_retire_lat *t; + + list_for_each_entry(t, &tpebs_results, nd) + tpebs_event_size++; + + record_argv = malloc((10 + 2 * tpebs_event_size) * sizeof(*record_argv)); + if (!record_argv) + return -ENOMEM; record_argv[i++] = "perf"; record_argv[i++] = "record"; record_argv[i++] = "-W"; record_argv[i++] = "--synth=no"; - record_argv[i++] = buf; - if (!cpumap_buf) { - pr_err("tpebs: Require cpumap list to run sampling\n"); - return -ECANCELED; - } - /* Use -C when cpumap_buf is not "-1" */ - if (strcmp(cpumap_buf, "-1")) { + scnprintf(control_fd_buf, sizeof(control_fd_buf), "--control=fd:%d,%d", + control_fd[0], ack_fd[1]); + record_argv[i++] = control_fd_buf; + + record_argv[i++] = "-o"; + record_argv[i++] = PERF_DATA; + + if (!perf_cpu_map__is_any_cpu_or_is_empty(evsel->evlist->core.user_requested_cpus)) { + cpu_map__snprint(evsel->evlist->core.user_requested_cpus, cpumap_buf, + sizeof(cpumap_buf)); record_argv[i++] = "-C"; record_argv[i++] = cpumap_buf; } - list_for_each_entry(e, &tpebs_results, nd) { + list_for_each_entry(t, &tpebs_results, nd) { record_argv[i++] = "-e"; - record_argv[i++] = e->name; + record_argv[i++] = t->event; } + record_argv[i++] = NULL; + assert(i == 10 + 2 * tpebs_event_size || i == 8 + 2 * tpebs_event_size); + /* Note, no workload given so system wide is implied. */ + + assert(tpebs_cmd.pid == 0); + tpebs_cmd.argv = record_argv; + tpebs_cmd.out = -1; + ret = start_command(&tpebs_cmd); + zfree(&tpebs_cmd.argv); + list_for_each_entry(t, &tpebs_results, nd) + t->started = true; - record_argv[i++] = "-o"; - record_argv[i++] = PERF_DATA; - - return 0; + return ret; } -static int prepare_run_command(const char **argv) +static bool is_child_pid(pid_t parent, pid_t child) { - tpebs_cmd = zalloc(sizeof(struct child_process)); - if (!tpebs_cmd) - return -ENOMEM; - tpebs_cmd->argv = argv; - tpebs_cmd->out = -1; - return 0; + if (parent < 0 || child < 0) + return false; + + while (true) { + char path[PATH_MAX]; + char line[256]; + FILE *fp; + +new_child: + if (parent == child) + return true; + + if (child <= 0) + return false; + + scnprintf(path, sizeof(path), "%s/%d/status", procfs__mountpoint(), child); + fp = fopen(path, "r"); + if (!fp) { + /* Presumably the process went away. Assume not a child. */ + return false; + } + while (fgets(line, sizeof(line), fp) != NULL) { + if (strncmp(line, "PPid:", 5) == 0) { + fclose(fp); + if (sscanf(line + 5, "%d", &child) != 1) { + /* Unexpected error parsing. */ + return false; + } + goto new_child; + } + } + /* Unexpected EOF. */ + fclose(fp); + return false; + } } -static int start_perf_record(int control_fd[], int ack_fd[], - const char *cpumap_buf) +static bool should_ignore_sample(const struct perf_sample *sample, const struct tpebs_retire_lat *t) { - const char **record_argv; - int ret; - char buf[32]; + pid_t workload_pid, sample_pid = sample->pid; - scnprintf(buf, sizeof(buf), "--control=fd:%d,%d", control_fd[0], ack_fd[1]); + /* + * During evlist__purge the evlist will be removed prior to the + * evsel__exit calling evsel__tpebs_close and taking the + * tpebs_mtx. Avoid a segfault by ignoring samples in this case. + */ + if (t->evsel->evlist == NULL) + return true; - record_argv = calloc(12 + 2 * tpebs_event_size, sizeof(char *)); - if (!record_argv) - return -ENOMEM; + workload_pid = t->evsel->evlist->workload.pid; + if (workload_pid < 0 || workload_pid == sample_pid) + return false; - ret = get_perf_record_args(record_argv, buf, cpumap_buf); - if (ret) - goto out; + if (!t->evsel->core.attr.inherit) + return true; - ret = prepare_run_command(record_argv); - if (ret) - goto out; - ret = start_command(tpebs_cmd); -out: - free(record_argv); - return ret; + return !is_child_pid(workload_pid, sample_pid); } static int process_sample_event(const struct perf_tool *tool __maybe_unused, @@ -124,27 +188,32 @@ static int process_sample_event(const struct perf_tool *tool __maybe_unused, struct evsel *evsel, struct machine *machine __maybe_unused) { - int ret = 0; - const char *evname; struct tpebs_retire_lat *t; - evname = evsel__name(evsel); - + mutex_lock(tpebs_mtx_get()); + if (tpebs_cmd.pid == 0) { + /* Record has terminated. */ + mutex_unlock(tpebs_mtx_get()); + return 0; + } + t = tpebs_retire_lat__find(evsel); + if (!t) { + mutex_unlock(tpebs_mtx_get()); + return -EINVAL; + } + if (should_ignore_sample(sample, t)) { + mutex_unlock(tpebs_mtx_get()); + return 0; + } /* * Need to handle per core results? We are assuming average retire * latency value will be used. Save the number of samples and the sum of * retire latency value for each event. */ - list_for_each_entry(t, &tpebs_results, nd) { - if (!strcmp(evname, t->name)) { - t->count += 1; - t->sum += sample->retire_lat; - t->val = (double) t->sum / t->count; - break; - } - } - - return ret; + t->last = sample->retire_lat; + update_stats(&t->stats, sample->retire_lat); + mutex_unlock(tpebs_mtx_get()); + return 0; } static int process_feature_event(struct perf_session *session, @@ -155,14 +224,13 @@ static int process_feature_event(struct perf_session *session, return 0; } -static void *__sample_reader(void *arg) +static void *__sample_reader(void *arg __maybe_unused) { - struct child_process *child = arg; struct perf_session *session; struct perf_data data = { .mode = PERF_DATA_MODE_READ, .path = PERF_DATA, - .file.fd = child->out, + .file.fd = tpebs_cmd.out, }; struct perf_tool tool; @@ -180,94 +248,277 @@ static void *__sample_reader(void *arg) return NULL; } +static int tpebs_send_record_cmd(const char *msg) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get()) +{ + struct pollfd pollfd = { .events = POLLIN, }; + int ret, len, retries = 0; + char ack_buf[8]; + + /* Check if the command exited before the send, done with the lock held. */ + if (tpebs_cmd.pid == 0) + return 0; + + /* + * Let go of the lock while sending/receiving as blocking can starve the + * sample reading thread. + */ + mutex_unlock(tpebs_mtx_get()); + + /* Send perf record command.*/ + len = strlen(msg); + ret = write(control_fd[1], msg, len); + if (ret != len) { + pr_err("perf record control write control message '%s' failed\n", msg); + ret = -EPIPE; + goto out; + } + + if (!strcmp(msg, EVLIST_CTL_CMD_STOP_TAG)) { + ret = 0; + goto out; + } + + /* Wait for an ack. */ + pollfd.fd = ack_fd[0]; + + /* + * We need this poll to ensure the ack_fd PIPE will not hang + * when perf record failed for any reason. The timeout value + * 3000ms is an empirical selection. + */ +again: + if (!poll(&pollfd, 1, 500)) { + if (check_if_command_finished(&tpebs_cmd)) { + ret = 0; + goto out; + } + + if (retries++ < 6) + goto again; + pr_err("tpebs failed: perf record ack timeout for '%s'\n", msg); + ret = -ETIMEDOUT; + goto out; + } + + if (!(pollfd.revents & POLLIN)) { + if (check_if_command_finished(&tpebs_cmd)) { + ret = 0; + goto out; + } + + pr_err("tpebs failed: did not received an ack for '%s'\n", msg); + ret = -EPIPE; + goto out; + } + + ret = read(ack_fd[0], ack_buf, sizeof(ack_buf)); + if (ret > 0) + ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG); + else + pr_err("tpebs: perf record control ack failed\n"); +out: + /* Re-take lock as expected by caller. */ + mutex_lock(tpebs_mtx_get()); + return ret; +} + /* * tpebs_stop - stop the sample data read thread and the perf record process. */ -static int tpebs_stop(void) +static int tpebs_stop(void) EXCLUSIVE_LOCKS_REQUIRED(tpebs_mtx_get()) { int ret = 0; /* Like tpebs_start, we should only run tpebs_end once. */ - if (tpebs_pid != -1) { - kill(tpebs_cmd->pid, SIGTERM); - tpebs_pid = -1; + if (tpebs_cmd.pid != 0) { + tpebs_send_record_cmd(EVLIST_CTL_CMD_STOP_TAG); + tpebs_cmd.pid = 0; + mutex_unlock(tpebs_mtx_get()); pthread_join(tpebs_reader_thread, NULL); - close(tpebs_cmd->out); - ret = finish_command(tpebs_cmd); + mutex_lock(tpebs_mtx_get()); + close(control_fd[0]); + close(control_fd[1]); + close(ack_fd[0]); + close(ack_fd[1]); + close(tpebs_cmd.out); + ret = finish_command(&tpebs_cmd); + tpebs_cmd.pid = 0; if (ret == -ERR_RUN_COMMAND_WAITPID_SIGNAL) ret = 0; } return ret; } -/* - * tpebs_start - start tpebs execution. - * @evsel_list: retire_latency evsels in this list will be selected and sampled - * to get the average retire_latency value. - * - * This function will be called from evlist level later when evlist__open() is - * called consistently. +/** + * evsel__tpebs_event() - Create string event encoding to pass to `perf record`. */ -int tpebs_start(struct evlist *evsel_list) +static int evsel__tpebs_event(struct evsel *evsel, char **event) { - int ret = 0; - struct evsel *evsel; - char cpumap_buf[50]; + char *name, *modifier; + int ret; + + name = strdup(evsel->name); + if (!name) + return -ENOMEM; + + modifier = strrchr(name, 'R'); + if (!modifier) { + ret = -EINVAL; + goto out; + } + *modifier = 'p'; + modifier = strchr(name, ':'); + if (!modifier) + modifier = strrchr(name, '/'); + if (!modifier) { + ret = -EINVAL; + goto out; + } + *modifier = '\0'; + if (asprintf(event, "%s/name=tpebs_event_%p/%s", name, evsel, modifier + 1) > 0) + ret = 0; + else + ret = -ENOMEM; +out: + if (ret) + pr_err("Tpebs event modifier broken '%s'\n", evsel->name); + free(name); + return ret; +} + +static struct tpebs_retire_lat *tpebs_retire_lat__new(struct evsel *evsel) +{ + struct tpebs_retire_lat *result = zalloc(sizeof(*result)); + int ret; + + if (!result) + return NULL; + + ret = evsel__tpebs_event(evsel, &result->event); + if (ret) { + free(result); + return NULL; + } + result->evsel = evsel; + return result; +} + +static void tpebs_retire_lat__delete(struct tpebs_retire_lat *r) +{ + zfree(&r->event); + free(r); +} + +static struct tpebs_retire_lat *tpebs_retire_lat__find(struct evsel *evsel) +{ + struct tpebs_retire_lat *t; + unsigned long num; + const char *evsel_name; /* - * We should only run tpebs_start when tpebs_recording is enabled. - * And we should only run it once with all the required events. + * Evsels will match for evlist with the retirement latency event. The + * name with "tpebs_event_" prefix will be present on events being read + * from `perf record`. */ - if (tpebs_pid != -1 || !tpebs_recording) + if (evsel__is_retire_lat(evsel)) { + list_for_each_entry(t, &tpebs_results, nd) { + if (t->evsel == evsel) + return t; + } + return NULL; + } + evsel_name = strstr(evsel->name, "tpebs_event_"); + if (!evsel_name) { + /* Unexpected that the perf record should have other events. */ + return NULL; + } + errno = 0; + num = strtoull(evsel_name + 12, NULL, 16); + if (errno) { + pr_err("Bad evsel for tpebs find '%s'\n", evsel->name); + return NULL; + } + list_for_each_entry(t, &tpebs_results, nd) { + if ((unsigned long)t->evsel == num) + return t; + } + return NULL; +} + +/** + * evsel__tpebs_prepare - create tpebs data structures ready for opening. + * @evsel: retire_latency evsel, all evsels on its list will be prepared. + */ +static int evsel__tpebs_prepare(struct evsel *evsel) +{ + struct evsel *pos; + struct tpebs_retire_lat *tpebs_event; + + mutex_lock(tpebs_mtx_get()); + tpebs_event = tpebs_retire_lat__find(evsel); + if (tpebs_event) { + /* evsel, or an identically named one, was already prepared. */ + mutex_unlock(tpebs_mtx_get()); return 0; + } + tpebs_event = tpebs_retire_lat__new(evsel); + if (!tpebs_event) { + mutex_unlock(tpebs_mtx_get()); + return -ENOMEM; + } + list_add_tail(&tpebs_event->nd, &tpebs_results); + mutex_unlock(tpebs_mtx_get()); - cpu_map__snprint(evsel_list->core.user_requested_cpus, cpumap_buf, sizeof(cpumap_buf)); /* - * Prepare perf record for sampling event retire_latency before fork and - * prepare workload + * Eagerly prepare all other evsels on the list to try to ensure that by + * open they are all known. */ - evlist__for_each_entry(evsel_list, evsel) { - int i; - char *name; - struct tpebs_retire_lat *new; + evlist__for_each_entry(evsel->evlist, pos) { + int ret; - if (!evsel->retire_lat) + if (pos == evsel || !pos->retire_lat) continue; - pr_debug("tpebs: Retire_latency of event %s is required\n", evsel->name); - for (i = strlen(evsel->name) - 1; i > 0; i--) { - if (evsel->name[i] == 'R') - break; - } - if (i <= 0 || evsel->name[i] != 'R') { - ret = -1; - goto err; - } + ret = evsel__tpebs_prepare(pos); + if (ret) + return ret; + } + return 0; +} - name = strdup(evsel->name); - if (!name) { - ret = -ENOMEM; - goto err; - } - name[i] = 'p'; +/** + * evsel__tpebs_open - starts tpebs execution. + * @evsel: retire_latency evsel, all evsels on its list will be selected. Each + * evsel is sampled to get the average retire_latency value. + */ +int evsel__tpebs_open(struct evsel *evsel) +{ + int ret; + bool tpebs_empty; - new = zalloc(sizeof(*new)); - if (!new) { - ret = -1; - zfree(&name); - goto err; - } - new->name = name; - new->tpebs_name = evsel->name; - list_add_tail(&new->nd, &tpebs_results); - tpebs_event_size += 1; + /* We should only run tpebs_start when tpebs_recording is enabled. */ + if (!tpebs_recording) + return 0; + /* Only start the events once. */ + if (tpebs_cmd.pid != 0) { + struct tpebs_retire_lat *t; + bool valid; + + mutex_lock(tpebs_mtx_get()); + t = tpebs_retire_lat__find(evsel); + valid = t && t->started; + mutex_unlock(tpebs_mtx_get()); + /* May fail as the event wasn't started. */ + return valid ? 0 : -EBUSY; } - if (tpebs_event_size > 0) { - struct pollfd pollfd = { .events = POLLIN, }; - int control_fd[2], ack_fd[2], len; - char ack_buf[8]; + ret = evsel__tpebs_prepare(evsel); + if (ret) + return ret; + mutex_lock(tpebs_mtx_get()); + tpebs_empty = list_empty(&tpebs_results); + if (!tpebs_empty) { /*Create control and ack fd for --control*/ if (pipe(control_fd) < 0) { pr_err("tpebs: Failed to create control fifo"); @@ -280,153 +531,131 @@ int tpebs_start(struct evlist *evsel_list) goto out; } - ret = start_perf_record(control_fd, ack_fd, cpumap_buf); + ret = evsel__tpebs_start_perf_record(evsel); if (ret) goto out; - tpebs_pid = tpebs_cmd->pid; - if (pthread_create(&tpebs_reader_thread, NULL, __sample_reader, tpebs_cmd)) { - kill(tpebs_cmd->pid, SIGTERM); - close(tpebs_cmd->out); - pr_err("Could not create thread to process sample data.\n"); - ret = -1; - goto out; - } - /* Wait for perf record initialization.*/ - len = strlen(EVLIST_CTL_CMD_ENABLE_TAG); - ret = write(control_fd[1], EVLIST_CTL_CMD_ENABLE_TAG, len); - if (ret != len) { - pr_err("perf record control write control message failed\n"); - goto out; - } - - /* wait for an ack */ - pollfd.fd = ack_fd[0]; - - /* - * We need this poll to ensure the ack_fd PIPE will not hang - * when perf record failed for any reason. The timeout value - * 3000ms is an empirical selection. - */ - if (!poll(&pollfd, 1, 3000)) { - pr_err("tpebs failed: perf record ack timeout\n"); - ret = -1; - goto out; - } - if (!(pollfd.revents & POLLIN)) { - pr_err("tpebs failed: did not received an ack\n"); + if (pthread_create(&tpebs_reader_thread, /*attr=*/NULL, __sample_reader, + /*arg=*/NULL)) { + kill(tpebs_cmd.pid, SIGTERM); + close(tpebs_cmd.out); + pr_err("Could not create thread to process sample data.\n"); ret = -1; goto out; } - - ret = read(ack_fd[0], ack_buf, sizeof(ack_buf)); - if (ret > 0) - ret = strcmp(ack_buf, EVLIST_CTL_CMD_ACK_TAG); - else { - pr_err("tpebs: perf record control ack failed\n"); - goto out; - } + ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_ENABLE_TAG); + } out: - close(control_fd[0]); - close(control_fd[1]); - close(ack_fd[0]); - close(ack_fd[1]); + if (ret) { + struct tpebs_retire_lat *t = tpebs_retire_lat__find(evsel); + + list_del_init(&t->nd); + tpebs_retire_lat__delete(t); } -err: - if (ret) - tpebs_delete(); + mutex_unlock(tpebs_mtx_get()); return ret; } - -int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread) +int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread) { - __u64 val; - bool found = false; + struct perf_counts_values *count, *old_count = NULL; struct tpebs_retire_lat *t; - struct perf_counts_values *count; + uint64_t val; + int ret; - /* Non reitre_latency evsel should never enter this function. */ - if (!evsel__is_retire_lat(evsel)) - return -1; + /* Only set retire_latency value to the first CPU and thread. */ + if (cpu_map_idx != 0 || thread != 0) + return 0; + + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); - /* - * Need to stop the forked record to ensure get sampled data from the - * PIPE to process and get non-zero retire_lat value for hybrid. - */ - tpebs_stop(); count = perf_counts(evsel->counts, cpu_map_idx, thread); - list_for_each_entry(t, &tpebs_results, nd) { - if (t->tpebs_name == evsel->name || - (evsel->metric_id && !strcmp(t->tpebs_name, evsel->metric_id))) { - found = true; + mutex_lock(tpebs_mtx_get()); + t = tpebs_retire_lat__find(evsel); + /* + * If reading the first tpebs result, send a ping to the record + * process. Allow the sample reader a chance to read by releasing and + * reacquiring the lock. + */ + if (t && &t->nd == tpebs_results.next) { + ret = tpebs_send_record_cmd(EVLIST_CTL_CMD_PING_TAG); + mutex_unlock(tpebs_mtx_get()); + if (ret) + return ret; + mutex_lock(tpebs_mtx_get()); + } + if (t == NULL || t->stats.n == 0) { + /* No sample data, use default. */ + if (tpebs_recording) { + pr_warning_once( + "Using precomputed retirement latency data as no samples\n"); + } + val = 0; + switch (tpebs_mode) { + case TPEBS_MODE__MIN: + val = rint(evsel->retirement_latency.min); + break; + case TPEBS_MODE__MAX: + val = rint(evsel->retirement_latency.max); + break; + default: + case TPEBS_MODE__LAST: + case TPEBS_MODE__MEAN: + val = rint(evsel->retirement_latency.mean); + break; + } + } else { + switch (tpebs_mode) { + case TPEBS_MODE__MIN: + val = t->stats.min; + break; + case TPEBS_MODE__MAX: + val = t->stats.max; + break; + case TPEBS_MODE__LAST: + val = t->last; + break; + default: + case TPEBS_MODE__MEAN: + val = rint(t->stats.mean); break; } } - - /* Set ena and run to non-zero */ - count->ena = count->run = 1; - count->lost = 0; - - if (!found) { - /* - * Set default value or 0 when retire_latency for this event is - * not found from sampling data (record_tpebs not set or 0 - * sample recorded). - */ - count->val = 0; - return 0; + mutex_unlock(tpebs_mtx_get()); + + if (old_count) { + count->val = old_count->val + val; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = val; + count->run++; + count->ena++; } - - /* - * Only set retire_latency value to the first CPU and thread. - */ - if (cpu_map_idx == 0 && thread == 0) - val = rint(t->val); - else - val = 0; - - count->val = val; return 0; } -static void tpebs_retire_lat__delete(struct tpebs_retire_lat *r) -{ - zfree(&r->name); - free(r); -} - - -/* - * tpebs_delete - delete tpebs related data and stop the created thread and - * process by calling tpebs_stop(). +/** + * evsel__tpebs_close() - delete tpebs related data. If the last event, stop the + * created thread and process by calling tpebs_stop(). * - * This function is called from evlist_delete() and also from builtin-stat - * stat_handle_error(). If tpebs_start() is called from places other then perf - * stat, need to ensure tpebs_delete() is also called to safely free mem and - * close the data read thread and the forked perf record process. - * - * This function is also called in evsel__close() to be symmetric with - * tpebs_start() being called in evsel__open(). We will update this call site - * when move tpebs_start() to evlist level. + * This function is called in evsel__close() to be symmetric with + * evsel__tpebs_open() being called in evsel__open(). */ -void tpebs_delete(void) +void evsel__tpebs_close(struct evsel *evsel) { - struct tpebs_retire_lat *r, *rtmp; - - if (tpebs_pid == -1) - return; - - tpebs_stop(); + struct tpebs_retire_lat *t; - list_for_each_entry_safe(r, rtmp, &tpebs_results, nd) { - list_del_init(&r->nd); - tpebs_retire_lat__delete(r); - } + mutex_lock(tpebs_mtx_get()); + t = tpebs_retire_lat__find(evsel); + if (t) { + list_del_init(&t->nd); + tpebs_retire_lat__delete(t); - if (tpebs_cmd) { - free(tpebs_cmd); - tpebs_cmd = NULL; + if (list_empty(&tpebs_results)) + tpebs_stop(); } + mutex_unlock(tpebs_mtx_get()); } diff --git a/tools/perf/util/intel-tpebs.h b/tools/perf/util/intel-tpebs.h index 766b3fbd79f1..9475e2e6ea74 100644 --- a/tools/perf/util/intel-tpebs.h +++ b/tools/perf/util/intel-tpebs.h @@ -2,34 +2,24 @@ /* * intel_tpebs.h: Intel TEPBS support */ -#ifndef INCLUDE__PERF_INTEL_TPEBS_H__ -#define INCLUDE__PERF_INTEL_TPEBS_H__ +#ifndef __INTEL_TPEBS_H +#define __INTEL_TPEBS_H -#include "stat.h" -#include "evsel.h" +struct evlist; +struct evsel; -#ifdef HAVE_ARCH_X86_64_SUPPORT +enum tpebs_mode { + TPEBS_MODE__MEAN, + TPEBS_MODE__MIN, + TPEBS_MODE__MAX, + TPEBS_MODE__LAST, +}; extern bool tpebs_recording; -int tpebs_start(struct evlist *evsel_list); -void tpebs_delete(void); -int tpebs_set_evsel(struct evsel *evsel, int cpu_map_idx, int thread); +extern enum tpebs_mode tpebs_mode; -#else +int evsel__tpebs_open(struct evsel *evsel); +void evsel__tpebs_close(struct evsel *evsel); +int evsel__tpebs_read(struct evsel *evsel, int cpu_map_idx, int thread); -static inline int tpebs_start(struct evlist *evsel_list __maybe_unused) -{ - return 0; -} - -static inline void tpebs_delete(void) {}; - -static inline int tpebs_set_evsel(struct evsel *evsel __maybe_unused, - int cpu_map_idx __maybe_unused, - int thread __maybe_unused) -{ - return 0; -} - -#endif -#endif +#endif /* __INTEL_TPEBS_H */ diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index b5d916aa49df..59c94190b092 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -18,6 +18,12 @@ struct lock_filter { char **slabs; }; +struct lock_delay { + char *sym; + unsigned long addr; + unsigned long time; +}; + struct lock_stat { struct hlist_node hash_entry; struct rb_node rb; /* used for sorting */ @@ -140,14 +146,17 @@ struct lock_contention { struct machine *machine; struct hlist_head *result; struct lock_filter *filters; + struct lock_delay *delays; struct lock_contention_fails fails; struct rb_root cgroups; + void *btf; unsigned long map_nr_entries; int max_stack; int stack_skip; int aggr_mode; int owner; int nr_filtered; + int nr_delays; bool save_callstack; }; diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 2531b373f2cf..7ec12c207970 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -20,6 +20,7 @@ #include "path.h" #include "srcline.h" #include "symbol.h" +#include "synthetic-events.h" #include "sort.h" #include "strlist.h" #include "target.h" @@ -128,23 +129,57 @@ out: return 0; } -struct machine *machine__new_host(void) +static struct machine *__machine__new_host(bool kernel_maps) { struct machine *machine = malloc(sizeof(*machine)); - if (machine != NULL) { - machine__init(machine, "", HOST_KERNEL_ID); + if (!machine) + return NULL; - if (machine__create_kernel_maps(machine) < 0) - goto out_delete; + machine__init(machine, "", HOST_KERNEL_ID); - machine->env = &perf_env; + if (kernel_maps && machine__create_kernel_maps(machine) < 0) { + free(machine); + return NULL; } + machine->env = &perf_env; + return machine; +} + +struct machine *machine__new_host(void) +{ + return __machine__new_host(/*kernel_maps=*/true); +} + +static int mmap_handler(const struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_mmap2_event(machine, event, sample); +} +static int machine__init_live(struct machine *machine, pid_t pid) +{ + union perf_event event; + + memset(&event, 0, sizeof(event)); + return perf_event__synthesize_mmap_events(NULL, &event, pid, pid, + mmap_handler, machine, true); +} + +struct machine *machine__new_live(bool kernel_maps, pid_t pid) +{ + struct machine *machine = __machine__new_host(kernel_maps); + + if (!machine) + return NULL; + + if (machine__init_live(machine, pid)) { + machine__delete(machine); + return NULL; + } return machine; -out_delete: - free(machine); - return NULL; } struct machine *machine__new_kallsyms(void) @@ -1976,7 +2011,7 @@ static void ip__resolve_ams(struct thread *thread, * Thus, we have to try consecutively until we find a match * or else, the symbol is unknown */ - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, /*symbols=*/true, &al); ams->addr = ip; ams->al_addr = al.addr; @@ -2078,7 +2113,7 @@ static int add_callchain_ip(struct thread *thread, al.sym = NULL; al.srcline = NULL; if (!cpumode) { - thread__find_cpumode_addr_location(thread, ip, &al); + thread__find_cpumode_addr_location(thread, ip, symbols, &al); } else { if (ip >= PERF_CONTEXT_MAX) { switch (ip) { @@ -2106,6 +2141,8 @@ static int add_callchain_ip(struct thread *thread, } if (symbols) thread__find_symbol(thread, *cpumode, ip, &al); + else + thread__find_map(thread, *cpumode, ip, &al); } if (al.sym != NULL) { diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index b56abec84fed..180b369c366c 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -171,6 +171,7 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); struct machine *machine__new_kallsyms(void); +struct machine *machine__new_live(bool kernel_maps, pid_t pid); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); void machine__delete_threads(struct machine *machine); diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 0b40d901675e..85b2a93a59ac 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -1082,10 +1082,13 @@ struct map *maps__find(struct maps *maps, u64 ip) while (!done) { down_read(maps__lock(maps)); if (maps__maps_by_address_sorted(maps)) { - struct map **mapp = - bsearch(&ip, maps__maps_by_address(maps), maps__nr_maps(maps), - sizeof(*mapp), map__addr_cmp); + struct map **mapp = NULL; + struct map **maps_by_address = maps__maps_by_address(maps); + unsigned int nr_maps = maps__nr_maps(maps); + if (maps_by_address && nr_maps) + mapp = bsearch(&ip, maps_by_address, nr_maps, sizeof(*mapp), + map__addr_cmp); if (mapp) result = map__get(*mapp); done = true; diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 884d9aebce91..80b3069427bc 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -303,15 +303,12 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr, char **eve } if (cpu_map) { - struct perf_cpu_map *online = cpu_map__online(); - - if (!perf_cpu_map__equal(cpu_map, online)) { + if (!perf_cpu_map__equal(cpu_map, cpu_map__online())) { char buf[200]; cpu_map__snprint(cpu_map, buf, sizeof(buf)); pr_warning("Memory events are enabled on a subset of CPUs: %s\n", buf); } - perf_cpu_map__put(online); perf_cpu_map__put(cpu_map); } @@ -680,7 +677,10 @@ do { \ if (lvl & P(LVL, LFB)) stats->ld_fbhit++; if (lvl & P(LVL, L1 )) stats->ld_l1hit++; if (lvl & P(LVL, L2)) { - stats->ld_l2hit++; + if (snoop & P(SNOOP, HITM)) + HITM_INC(lcl_hitm); + else + stats->ld_l2hit++; if (snoopx & P(SNOOPX, PEER)) PEER_INC(lcl_peer); @@ -799,3 +799,181 @@ void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) stats->nomap += add->nomap; stats->noparse += add->noparse; } + +/* + * It returns an index in hist_entry->mem_stat array for the given val which + * represents a data-src based on the mem_stat_type. + */ +int mem_stat_index(const enum mem_stat_type mst, const u64 val) +{ + union perf_mem_data_src src = { + .val = val, + }; + + switch (mst) { + case PERF_MEM_STAT_OP: + switch (src.mem_op) { + case PERF_MEM_OP_LOAD: + return MEM_STAT_OP_LOAD; + case PERF_MEM_OP_STORE: + return MEM_STAT_OP_STORE; + case PERF_MEM_OP_LOAD | PERF_MEM_OP_STORE: + return MEM_STAT_OP_LDST; + default: + if (src.mem_op & PERF_MEM_OP_PFETCH) + return MEM_STAT_OP_PFETCH; + if (src.mem_op & PERF_MEM_OP_EXEC) + return MEM_STAT_OP_EXEC; + return MEM_STAT_OP_OTHER; + } + case PERF_MEM_STAT_CACHE: + switch (src.mem_lvl_num) { + case PERF_MEM_LVLNUM_L1: + return MEM_STAT_CACHE_L1; + case PERF_MEM_LVLNUM_L2: + return MEM_STAT_CACHE_L2; + case PERF_MEM_LVLNUM_L3: + return MEM_STAT_CACHE_L3; + case PERF_MEM_LVLNUM_L4: + return MEM_STAT_CACHE_L4; + case PERF_MEM_LVLNUM_LFB: + return MEM_STAT_CACHE_L1_BUF; + case PERF_MEM_LVLNUM_L2_MHB: + return MEM_STAT_CACHE_L2_BUF; + default: + return MEM_STAT_CACHE_OTHER; + } + case PERF_MEM_STAT_MEMORY: + switch (src.mem_lvl_num) { + case PERF_MEM_LVLNUM_MSC: + return MEM_STAT_MEMORY_MSC; + case PERF_MEM_LVLNUM_RAM: + return MEM_STAT_MEMORY_RAM; + case PERF_MEM_LVLNUM_UNC: + return MEM_STAT_MEMORY_UNC; + case PERF_MEM_LVLNUM_CXL: + return MEM_STAT_MEMORY_CXL; + case PERF_MEM_LVLNUM_IO: + return MEM_STAT_MEMORY_IO; + case PERF_MEM_LVLNUM_PMEM: + return MEM_STAT_MEMORY_PMEM; + default: + return MEM_STAT_MEMORY_OTHER; + } + case PERF_MEM_STAT_SNOOP: + switch (src.mem_snoop) { + case PERF_MEM_SNOOP_HIT: + return MEM_STAT_SNOOP_HIT; + case PERF_MEM_SNOOP_HITM: + return MEM_STAT_SNOOP_HITM; + case PERF_MEM_SNOOP_MISS: + return MEM_STAT_SNOOP_MISS; + default: + return MEM_STAT_SNOOP_OTHER; + } + case PERF_MEM_STAT_DTLB: + switch (src.mem_dtlb) { + case PERF_MEM_TLB_L1 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_L1_HIT; + case PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_L2_HIT; + case PERF_MEM_TLB_L1 | PERF_MEM_TLB_L2 | PERF_MEM_TLB_HIT: + return MEM_STAT_DTLB_ANY_HIT; + default: + if (src.mem_dtlb & PERF_MEM_TLB_MISS) + return MEM_STAT_DTLB_MISS; + return MEM_STAT_DTLB_OTHER; + } + default: + break; + } + return -1; +} + +/* To align output, returned string should be shorter than MEM_STAT_PRINT_LEN */ +const char *mem_stat_name(const enum mem_stat_type mst, const int idx) +{ + switch (mst) { + case PERF_MEM_STAT_OP: + switch (idx) { + case MEM_STAT_OP_LOAD: + return "Load"; + case MEM_STAT_OP_STORE: + return "Store"; + case MEM_STAT_OP_LDST: + return "Ld+St"; + case MEM_STAT_OP_PFETCH: + return "Pfetch"; + case MEM_STAT_OP_EXEC: + return "Exec"; + case MEM_STAT_OP_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_CACHE: + switch (idx) { + case MEM_STAT_CACHE_L1: + return "L1"; + case MEM_STAT_CACHE_L2: + return "L2"; + case MEM_STAT_CACHE_L3: + return "L3"; + case MEM_STAT_CACHE_L4: + return "L4"; + case MEM_STAT_CACHE_L1_BUF: + return "L1-buf"; + case MEM_STAT_CACHE_L2_BUF: + return "L2-buf"; + case MEM_STAT_CACHE_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_MEMORY: + switch (idx) { + case MEM_STAT_MEMORY_RAM: + return "RAM"; + case MEM_STAT_MEMORY_MSC: + return "MSC"; + case MEM_STAT_MEMORY_UNC: + return "Uncach"; + case MEM_STAT_MEMORY_CXL: + return "CXL"; + case MEM_STAT_MEMORY_IO: + return "IO"; + case MEM_STAT_MEMORY_PMEM: + return "PMEM"; + case MEM_STAT_MEMORY_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_SNOOP: + switch (idx) { + case MEM_STAT_SNOOP_HIT: + return "Hit"; + case MEM_STAT_SNOOP_HITM: + return "HitM"; + case MEM_STAT_SNOOP_MISS: + return "Miss"; + case MEM_STAT_SNOOP_OTHER: + default: + return "Other"; + } + case PERF_MEM_STAT_DTLB: + switch (idx) { + case MEM_STAT_DTLB_L1_HIT: + return "L1-Hit"; + case MEM_STAT_DTLB_L2_HIT: + return "L2-Hit"; + case MEM_STAT_DTLB_ANY_HIT: + return "L?-Hit"; + case MEM_STAT_DTLB_MISS: + return "Miss"; + case MEM_STAT_DTLB_OTHER: + default: + return "Other"; + } + default: + break; + } + return "N/A"; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index a5c19d39ee37..5b98076904b0 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -89,4 +89,61 @@ struct hist_entry; int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi); void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add); +enum mem_stat_type { + PERF_MEM_STAT_OP, + PERF_MEM_STAT_CACHE, + PERF_MEM_STAT_MEMORY, + PERF_MEM_STAT_SNOOP, + PERF_MEM_STAT_DTLB, +}; + +#define MEM_STAT_PRINT_LEN 7 /* 1 space + 5 digits + 1 percent sign */ + +enum mem_stat_op { + MEM_STAT_OP_LOAD, + MEM_STAT_OP_STORE, + MEM_STAT_OP_LDST, + MEM_STAT_OP_PFETCH, + MEM_STAT_OP_EXEC, + MEM_STAT_OP_OTHER, +}; + +enum mem_stat_cache { + MEM_STAT_CACHE_L1, + MEM_STAT_CACHE_L2, + MEM_STAT_CACHE_L3, + MEM_STAT_CACHE_L4, + MEM_STAT_CACHE_L1_BUF, + MEM_STAT_CACHE_L2_BUF, + MEM_STAT_CACHE_OTHER, +}; + +enum mem_stat_memory { + MEM_STAT_MEMORY_RAM, + MEM_STAT_MEMORY_MSC, + MEM_STAT_MEMORY_UNC, + MEM_STAT_MEMORY_CXL, + MEM_STAT_MEMORY_IO, + MEM_STAT_MEMORY_PMEM, + MEM_STAT_MEMORY_OTHER, +}; + +enum mem_stat_snoop { + MEM_STAT_SNOOP_HIT, + MEM_STAT_SNOOP_HITM, + MEM_STAT_SNOOP_MISS, + MEM_STAT_SNOOP_OTHER, +}; + +enum mem_stat_dtlb { + MEM_STAT_DTLB_L1_HIT, + MEM_STAT_DTLB_L2_HIT, + MEM_STAT_DTLB_ANY_HIT, + MEM_STAT_DTLB_MISS, + MEM_STAT_DTLB_OTHER, +}; + +int mem_stat_index(const enum mem_stat_type mst, const u64 data_src); +const char *mem_stat_name(const enum mem_stat_type mst, const int idx); + #endif /* __PERF_MEM_EVENTS_H */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 46920ebadfd1..43d35f956a33 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -353,7 +353,7 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids, return 0; } -static bool match_metric(const char *metric_or_groups, const char *sought) +static bool match_metric_or_groups(const char *metric_or_groups, const char *sought) { int len; char *m; @@ -369,18 +369,19 @@ static bool match_metric(const char *metric_or_groups, const char *sought) (metric_or_groups[len] == 0 || metric_or_groups[len] == ';')) return true; m = strchr(metric_or_groups, ';'); - return m && match_metric(m + 1, sought); + return m && match_metric_or_groups(m + 1, sought); } -static bool match_pm_metric(const struct pmu_metric *pm, const char *pmu, const char *metric) +static bool match_pm_metric_or_groups(const struct pmu_metric *pm, const char *pmu, + const char *metric_or_groups) { const char *pm_pmu = pm->pmu ?: "cpu"; if (strcmp(pmu, "all") && strcmp(pm_pmu, pmu)) return false; - return match_metric(pm->metric_group, metric) || - match_metric(pm->metric_name, metric); + return match_metric_or_groups(pm->metric_group, metric_or_groups) || + match_metric_or_groups(pm->metric_name, metric_or_groups); } /** struct mep - RB-tree node for building printing information. */ @@ -395,6 +396,7 @@ struct mep { const char *metric_expr; const char *metric_threshold; const char *metric_unit; + const char *pmu_name; }; static int mep_cmp(struct rb_node *rb_node, const void *entry) @@ -475,6 +477,7 @@ static int metricgroup__add_to_mep_groups(const struct pmu_metric *pm, me->metric_expr = pm->metric_expr; me->metric_threshold = pm->metric_threshold; me->metric_unit = pm->unit; + me->pmu_name = pm->pmu; } } free(omg); @@ -550,7 +553,8 @@ void metricgroup__print(const struct print_callbacks *print_cb, void *print_stat me->metric_long_desc, me->metric_expr, me->metric_threshold, - me->metric_unit); + me->metric_unit, + me->pmu_name); next = rb_next(node); rblist__remove_node(&groups, node); } @@ -802,11 +806,6 @@ struct metricgroup_add_iter_data { const struct pmu_metrics_table *table; }; -static bool metricgroup__find_metric(const char *pmu, - const char *metric, - const struct pmu_metrics_table *table, - struct pmu_metric *pm); - static int add_metric(struct list_head *metric_list, const struct pmu_metric *pm, const char *modifier, @@ -818,6 +817,16 @@ static int add_metric(struct list_head *metric_list, const struct visited_metric *visited, const struct pmu_metrics_table *table); +static int metricgroup__find_metric_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table __maybe_unused, + void *vdata) +{ + struct pmu_metric *copied_pm = vdata; + + memcpy(copied_pm, pm, sizeof(*pm)); + return 0; +} + /** * resolve_metric - Locate metrics within the root metric and recursively add * references to them. @@ -838,7 +847,7 @@ static int add_metric(struct list_head *metric_list, * architecture perf is running upon. */ static int resolve_metric(struct list_head *metric_list, - const char *pmu, + struct perf_pmu *pmu, const char *modifier, bool metric_no_group, bool metric_no_threshold, @@ -868,7 +877,9 @@ static int resolve_metric(struct list_head *metric_list, hashmap__for_each_entry(root_metric->pctx->ids, cur, bkt) { struct pmu_metric pm; - if (metricgroup__find_metric(pmu, cur->pkey, table, &pm)) { + if (pmu_metrics_table__find_metric(table, pmu, cur->pkey, + metricgroup__find_metric_callback, + &pm) != PMU_METRICS__NOT_FOUND) { pending = realloc(pending, (pending_cnt + 1) * sizeof(struct to_resolve)); if (!pending) @@ -1019,7 +1030,12 @@ static int __add_metric(struct list_head *metric_list, } if (!ret) { /* Resolve referenced metrics. */ - const char *pmu = pm->pmu ?: "cpu"; + struct perf_pmu *pmu; + + if (pm->pmu && pm->pmu[0] != '\0') + pmu = perf_pmus__find(pm->pmu); + else + pmu = perf_pmus__scan_core(/*pmu=*/ NULL); ret = resolve_metric(metric_list, pmu, modifier, metric_no_group, metric_no_threshold, user_requested_cpu_list, @@ -1036,44 +1052,6 @@ static int __add_metric(struct list_head *metric_list, return ret; } -struct metricgroup__find_metric_data { - const char *pmu; - const char *metric; - struct pmu_metric *pm; -}; - -static int metricgroup__find_metric_callback(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *vdata) -{ - struct metricgroup__find_metric_data *data = vdata; - const char *pm_pmu = pm->pmu ?: "cpu"; - - if (strcmp(data->pmu, "all") && strcmp(pm_pmu, data->pmu)) - return 0; - - if (!match_metric(pm->metric_name, data->metric)) - return 0; - - memcpy(data->pm, pm, sizeof(*pm)); - return 1; -} - -static bool metricgroup__find_metric(const char *pmu, - const char *metric, - const struct pmu_metrics_table *table, - struct pmu_metric *pm) -{ - struct metricgroup__find_metric_data data = { - .pmu = pmu, - .metric = metric, - .pm = pm, - }; - - return pmu_metrics_table__for_each_metric(table, metricgroup__find_metric_callback, &data) - ? true : false; -} - static int add_metric(struct list_head *metric_list, const struct pmu_metric *pm, const char *modifier, @@ -1119,7 +1097,7 @@ static int metricgroup__add_metric_sys_event_iter(const struct pmu_metric *pm, struct metricgroup_add_iter_data *d = data; int ret; - if (!match_pm_metric(pm, d->pmu, d->metric_name)) + if (!match_pm_metric_or_groups(pm, d->pmu, d->metric_name)) return 0; ret = add_metric(d->metric_list, pm, d->modifier, d->metric_no_group, @@ -1200,9 +1178,9 @@ static int metricgroup__add_metric_callback(const struct pmu_metric *pm, struct metricgroup__add_metric_data *data = vdata; int ret = 0; - if (pm->metric_expr && match_pm_metric(pm, data->pmu, data->metric_name)) { + if (pm->metric_expr && match_pm_metric_or_groups(pm, data->pmu, data->metric_name)) { bool metric_no_group = data->metric_no_group || - match_metric(pm->metricgroup_no_group, data->metric_name); + match_metric_or_groups(pm->metricgroup_no_group, data->metric_name); data->has_match = true; ret = add_metric(data->list, pm, data->modifier, metric_no_group, @@ -1723,29 +1701,32 @@ int metricgroup__parse_groups_test(struct evlist *evlist, struct metricgroup__has_metric_data { const char *pmu; - const char *metric; + const char *metric_or_groups; }; -static int metricgroup__has_metric_callback(const struct pmu_metric *pm, - const struct pmu_metrics_table *table __maybe_unused, - void *vdata) +static int metricgroup__has_metric_or_groups_callback(const struct pmu_metric *pm, + const struct pmu_metrics_table *table + __maybe_unused, + void *vdata) { struct metricgroup__has_metric_data *data = vdata; - return match_pm_metric(pm, data->pmu, data->metric) ? 1 : 0; + return match_pm_metric_or_groups(pm, data->pmu, data->metric_or_groups) ? 1 : 0; } -bool metricgroup__has_metric(const char *pmu, const char *metric) +bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups) { const struct pmu_metrics_table *table = pmu_metrics_table__find(); struct metricgroup__has_metric_data data = { .pmu = pmu, - .metric = metric, + .metric_or_groups = metric_or_groups, }; if (!table) return false; - return pmu_metrics_table__for_each_metric(table, metricgroup__has_metric_callback, &data) + return pmu_metrics_table__for_each_metric(table, + metricgroup__has_metric_or_groups_callback, + &data) ? true : false; } diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h index 779f6ede1b51..a04ac1afa6cc 100644 --- a/tools/perf/util/metricgroup.h +++ b/tools/perf/util/metricgroup.h @@ -85,7 +85,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist, struct rblist *metric_events); void metricgroup__print(const struct print_callbacks *print_cb, void *print_state); -bool metricgroup__has_metric(const char *pmu, const char *metric); +bool metricgroup__has_metric_or_groups(const char *pmu, const char *metric_or_groups); unsigned int metricgroups__topdown_max_level(void); int arch_get_runtimeparam(const struct pmu_metric *pm); void metricgroup__rblist_exit(struct rblist *metric_events); diff --git a/tools/perf/util/mutex.h b/tools/perf/util/mutex.h index 62d258c71ded..38458f00846f 100644 --- a/tools/perf/util/mutex.h +++ b/tools/perf/util/mutex.h @@ -43,6 +43,12 @@ #define EXCLUSIVE_LOCK_FUNCTION(...) __attribute__((exclusive_lock_function(__VA_ARGS__))) /* + * Documents functions that acquire a shared (reader) lock in the body of a + * function, and do not release it. + */ +#define SHARED_LOCK_FUNCTION(...) __attribute__((shared_lock_function(__VA_ARGS__))) + +/* * Documents functions that expect a lock to be held on entry to the function, * and release it in the body of the function. */ @@ -55,6 +61,9 @@ /* Documents a function that expects a mutex to be held prior to entry. */ #define EXCLUSIVE_LOCKS_REQUIRED(...) __attribute__((exclusive_locks_required(__VA_ARGS__))) +/* Documents a function that expects a shared (reader) lock to be held prior to entry. */ +#define SHARED_LOCKS_REQUIRED(...) __attribute__((shared_locks_required(__VA_ARGS__))) + /* Turns off thread safety checking within the body of a particular function. */ #define NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) @@ -66,9 +75,11 @@ #define LOCKS_EXCLUDED(...) #define LOCK_RETURNED(x) #define EXCLUSIVE_LOCK_FUNCTION(...) +#define SHARED_LOCK_FUNCTION(...) #define UNLOCK_FUNCTION(...) #define EXCLUSIVE_TRYLOCK_FUNCTION(...) #define EXCLUSIVE_LOCKS_REQUIRED(...) +#define SHARED_LOCKS_REQUIRED(...) #define NO_THREAD_SAFETY_ANALYSIS #endif diff --git a/tools/perf/util/off_cpu.h b/tools/perf/util/off_cpu.h index 2dd67c60f211..64bf763ddf50 100644 --- a/tools/perf/util/off_cpu.h +++ b/tools/perf/util/off_cpu.h @@ -13,9 +13,10 @@ struct record_opts; #define OFFCPU_SAMPLE_TYPES (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \ PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | \ - PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_PERIOD | PERF_SAMPLE_RAW | \ PERF_SAMPLE_CGROUP) +#define OFFCPU_THRESH 500000000ULL #ifdef HAVE_BPF_SKEL int off_cpu_prepare(struct evlist *evlist, struct target *target, diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5152fd5a6ead..2380de56a207 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -7,6 +7,7 @@ #include <errno.h> #include <sys/ioctl.h> #include <sys/param.h> +#include "cpumap.h" #include "term.h" #include "env.h" #include "evlist.h" @@ -28,6 +29,7 @@ #include "util/evsel_config.h" #include "util/event.h" #include "util/bpf-filter.h" +#include "util/stat.h" #include "util/util.h" #include "tracepoint.h" @@ -179,6 +181,26 @@ static char *get_config_name(const struct parse_events_terms *head_terms) return get_config_str(head_terms, PARSE_EVENTS__TERM_TYPE_NAME); } +static struct perf_cpu_map *get_config_cpu(const struct parse_events_terms *head_terms) +{ + struct parse_events_term *term; + struct perf_cpu_map *cpus = NULL; + + if (!head_terms) + return NULL; + + list_for_each_entry(term, &head_terms->terms, list) { + if (term->type_term == PARSE_EVENTS__TERM_TYPE_CPU) { + struct perf_cpu_map *cpu = perf_cpu_map__new_int(term->val.num); + + perf_cpu_map__merge(&cpus, cpu); + perf_cpu_map__put(cpu); + } + } + + return cpus; +} + /** * fix_raw - For each raw term see if there is an event (aka alias) in pmu that * matches the raw's string value. If the string value matches an @@ -228,25 +250,55 @@ __add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, bool init_attr, const char *name, const char *metric_id, struct perf_pmu *pmu, - struct list_head *config_terms, bool auto_merge_stats, + struct list_head *config_terms, struct evsel *first_wildcard_match, struct perf_cpu_map *cpu_list, u64 alternate_hw_config) { struct evsel *evsel; - struct perf_cpu_map *cpus = perf_cpu_map__is_empty(cpu_list) && pmu ? pmu->cpus : cpu_list; + bool is_pmu_core; + struct perf_cpu_map *cpus; - cpus = perf_cpu_map__get(cpus); - if (pmu) - perf_pmu__warn_invalid_formats(pmu); + /* + * Ensure the first_wildcard_match's PMU matches that of the new event + * being added. Otherwise try to match with another event further down + * the evlist. + */ + if (first_wildcard_match) { + struct evsel *pos = list_prev_entry(first_wildcard_match, core.node); + + first_wildcard_match = NULL; + list_for_each_entry_continue(pos, list, core.node) { + if (perf_pmu__name_no_suffix_match(pos->pmu, pmu->name)) { + first_wildcard_match = pos; + break; + } + if (pos->pmu->is_core && (!pmu || pmu->is_core)) { + first_wildcard_match = pos; + break; + } + } + } - if (pmu && (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX)) { - perf_pmu__warn_invalid_config(pmu, attr->config, name, - PERF_PMU_FORMAT_VALUE_CONFIG, "config"); - perf_pmu__warn_invalid_config(pmu, attr->config1, name, - PERF_PMU_FORMAT_VALUE_CONFIG1, "config1"); - perf_pmu__warn_invalid_config(pmu, attr->config2, name, - PERF_PMU_FORMAT_VALUE_CONFIG2, "config2"); - perf_pmu__warn_invalid_config(pmu, attr->config3, name, - PERF_PMU_FORMAT_VALUE_CONFIG3, "config3"); + if (pmu) { + is_pmu_core = pmu->is_core; + cpus = perf_cpu_map__get(perf_cpu_map__is_empty(cpu_list) ? pmu->cpus : cpu_list); + perf_pmu__warn_invalid_formats(pmu); + if (attr->type == PERF_TYPE_RAW || attr->type >= PERF_TYPE_MAX) { + perf_pmu__warn_invalid_config(pmu, attr->config, name, + PERF_PMU_FORMAT_VALUE_CONFIG, "config"); + perf_pmu__warn_invalid_config(pmu, attr->config1, name, + PERF_PMU_FORMAT_VALUE_CONFIG1, "config1"); + perf_pmu__warn_invalid_config(pmu, attr->config2, name, + PERF_PMU_FORMAT_VALUE_CONFIG2, "config2"); + perf_pmu__warn_invalid_config(pmu, attr->config3, name, + PERF_PMU_FORMAT_VALUE_CONFIG3, "config3"); + } + } else { + is_pmu_core = (attr->type == PERF_TYPE_HARDWARE || + attr->type == PERF_TYPE_HW_CACHE); + if (perf_cpu_map__is_empty(cpu_list)) + cpus = is_pmu_core ? perf_cpu_map__new_online_cpus() : NULL; + else + cpus = perf_cpu_map__get(cpu_list); } if (init_attr) event_attr_init(attr); @@ -261,10 +313,10 @@ __add_event(struct list_head *list, int *idx, evsel->core.cpus = cpus; evsel->core.own_cpus = perf_cpu_map__get(cpus); evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; - evsel->core.is_pmu_core = pmu ? pmu->is_core : false; - evsel->auto_merge_stats = auto_merge_stats; + evsel->core.is_pmu_core = is_pmu_core; evsel->pmu = pmu; evsel->alternate_hw_config = alternate_hw_config; + evsel->first_wildcard_match = first_wildcard_match; if (name) evsel->name = strdup(name); @@ -287,7 +339,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, { return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name, metric_id, pmu, /*config_terms=*/NULL, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL, + /*first_wildcard_match=*/NULL, /*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX); } @@ -298,7 +350,7 @@ static int add_event(struct list_head *list, int *idx, { return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id, /*pmu=*/NULL, config_terms, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL, + /*first_wildcard_match=*/NULL, /*cpu_list=*/NULL, alternate_hw_config) ? 0 : -ENOMEM; } @@ -423,7 +475,7 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state, static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - bool auto_merge_stats, u64 alternate_hw_config); + struct evsel *first_wildcard_match, u64 alternate_hw_config); int parse_events_add_cache(struct list_head *list, int *idx, const char *name, struct parse_events_state *parse_state, @@ -433,11 +485,13 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, bool found_supported = false; const char *config_name = get_config_name(parsed_terms); const char *metric_id = get_config_metric_id(parsed_terms); + struct perf_cpu_map *cpus = get_config_cpu(parsed_terms); + int ret = 0; + struct evsel *first_wildcard_match = NULL; while ((pmu = perf_pmus__scan(pmu)) != NULL) { LIST_HEAD(config_terms); struct perf_event_attr attr; - int ret; if (parse_events__filter_pmu(parse_state, pmu)) continue; @@ -449,10 +503,13 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, */ ret = parse_events_add_pmu(parse_state, list, pmu, parsed_terms, - perf_pmu__auto_merge_stats(pmu), + first_wildcard_match, /*alternate_hw_config=*/PERF_COUNT_HW_MAX); if (ret) - return ret; + goto out_err; + if (first_wildcard_match == NULL) + first_wildcard_match = + container_of(list->prev, struct evsel, core.node); continue; } @@ -472,21 +529,29 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, if (parsed_terms) { if (config_attr(&attr, parsed_terms, parse_state->error, - config_term_common)) - return -EINVAL; - - if (get_config_terms(parsed_terms, &config_terms)) - return -ENOMEM; + config_term_common)) { + ret = -EINVAL; + goto out_err; + } + if (get_config_terms(parsed_terms, &config_terms)) { + ret = -ENOMEM; + goto out_err; + } } if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL, - /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) - return -ENOMEM; + metric_id, pmu, &config_terms, first_wildcard_match, + cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) + ret = -ENOMEM; + if (first_wildcard_match == NULL) + first_wildcard_match = container_of(list->prev, struct evsel, core.node); free_config_terms(&config_terms); + if (ret) + goto out_err; } +out_err: + perf_cpu_map__put(cpus); return found_supported ? 0 : -EINVAL; } @@ -805,6 +870,7 @@ const char *parse_events__term_type_str(enum parse_events__term_type term_type) [PARSE_EVENTS__TERM_TYPE_RAW] = "raw", [PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE] = "legacy-cache", [PARSE_EVENTS__TERM_TYPE_HARDWARE] = "hardware", + [PARSE_EVENTS__TERM_TYPE_CPU] = "cpu", }; if ((unsigned int)term_type >= __PARSE_EVENTS__TERM_TYPE_NR) return "unknown term"; @@ -834,6 +900,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er case PARSE_EVENTS__TERM_TYPE_METRIC_ID: case PARSE_EVENTS__TERM_TYPE_SAMPLE_PERIOD: case PARSE_EVENTS__TERM_TYPE_PERCORE: + case PARSE_EVENTS__TERM_TYPE_CPU: return true; case PARSE_EVENTS__TERM_TYPE_USER: case PARSE_EVENTS__TERM_TYPE_SAMPLE_FREQ: @@ -981,6 +1048,15 @@ do { \ return -EINVAL; } break; + case PARSE_EVENTS__TERM_TYPE_CPU: + CHECK_TYPE_VAL(NUM); + if (term->val.num >= (u64)cpu__max_present_cpu().cpu) { + parse_events_error__handle(err, term->err_val, + strdup("too big"), + NULL); + return -EINVAL; + } + break; case PARSE_EVENTS__TERM_TYPE_DRV_CFG: case PARSE_EVENTS__TERM_TYPE_USER: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: @@ -1108,6 +1184,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr, case PARSE_EVENTS__TERM_TYPE_RAW: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_CPU: default: if (err) { parse_events_error__handle(err, term->err_term, @@ -1242,6 +1319,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_RAW: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_CPU: default: break; } @@ -1296,6 +1374,7 @@ static int get_config_chgs(struct perf_pmu *pmu, struct parse_events_terms *head case PARSE_EVENTS__TERM_TYPE_RAW: case PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE: case PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_CPU: default: break; } @@ -1335,11 +1414,13 @@ int parse_events_add_tracepoint(struct parse_events_state *parse_state, static int __parse_events_add_numeric(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, u32 type, u32 extended_type, - u64 config, const struct parse_events_terms *head_config) + u64 config, const struct parse_events_terms *head_config, + struct evsel *first_wildcard_match) { struct perf_event_attr attr; LIST_HEAD(config_terms); const char *name, *metric_id; + struct perf_cpu_map *cpus; int ret; memset(&attr, 0, sizeof(attr)); @@ -1361,10 +1442,11 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state, name = get_config_name(head_config); metric_id = get_config_metric_id(head_config); + cpus = get_config_cpu(head_config); ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name, - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX - ) == NULL ? -ENOMEM : 0; + metric_id, pmu, &config_terms, first_wildcard_match, + cpus, /*alternate_hw_config=*/PERF_COUNT_HW_MAX) ? 0 : -ENOMEM; + perf_cpu_map__put(cpus); free_config_terms(&config_terms); return ret; } @@ -1380,6 +1462,7 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, /* Wildcards on numeric values are only supported by core PMUs. */ if (wildcard && perf_pmus__supports_extended_type()) { + struct evsel *first_wildcard_match = NULL; while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { int ret; @@ -1389,15 +1472,20 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, ret = __parse_events_add_numeric(parse_state, list, pmu, type, pmu->type, - config, head_config); + config, head_config, + first_wildcard_match); if (ret) return ret; + if (first_wildcard_match == NULL) + first_wildcard_match = + container_of(list->prev, struct evsel, core.node); } if (found_supported) return 0; } return __parse_events_add_numeric(parse_state, list, perf_pmus__find_by_type(type), - type, /*extended_type=*/0, config, head_config); + type, /*extended_type=*/0, config, head_config, + /*first_wildcard_match=*/NULL); } static bool config_term_percore(struct list_head *config_terms) @@ -1415,7 +1503,7 @@ static bool config_term_percore(struct list_head *config_terms) static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - bool auto_merge_stats, u64 alternate_hw_config) + struct evsel *first_wildcard_match, u64 alternate_hw_config) { struct perf_event_attr attr; struct perf_pmu_info info; @@ -1424,6 +1512,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, LIST_HEAD(config_terms); struct parse_events_terms parsed_terms; bool alias_rewrote_terms = false; + struct perf_cpu_map *term_cpu = NULL; if (verbose > 1) { struct strbuf sb; @@ -1451,7 +1540,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, /*name=*/NULL, /*metric_id=*/NULL, pmu, - /*config_terms=*/NULL, auto_merge_stats, + /*config_terms=*/NULL, first_wildcard_match, /*cpu_list=*/NULL, alternate_hw_config); return evsel ? 0 : -ENOMEM; } @@ -1518,11 +1607,12 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, return -EINVAL; } + term_cpu = get_config_cpu(&parsed_terms); evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, get_config_name(&parsed_terms), get_config_metric_id(&parsed_terms), pmu, - &config_terms, auto_merge_stats, /*cpu_list=*/NULL, - alternate_hw_config); + &config_terms, first_wildcard_match, term_cpu, alternate_hw_config); + perf_cpu_map__put(term_cpu); if (!evsel) { parse_events_terms__exit(&parsed_terms); return -ENOMEM; @@ -1539,6 +1629,10 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; evsel->snapshot = info.snapshot; + evsel->retirement_latency.mean = info.retirement_latency_mean; + evsel->retirement_latency.min = info.retirement_latency_min; + evsel->retirement_latency.max = info.retirement_latency_max; + return 0; } @@ -1554,6 +1648,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, int ok = 0; const char *config; struct parse_events_terms parsed_terms; + struct evsel *first_wildcard_match = NULL; *listp = NULL; @@ -1586,17 +1681,14 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, INIT_LIST_HEAD(list); while ((pmu = perf_pmus__scan(pmu)) != NULL) { - bool auto_merge_stats; - if (parse_events__filter_pmu(parse_state, pmu)) continue; if (!perf_pmu__have_event(pmu, event_name)) continue; - auto_merge_stats = perf_pmu__auto_merge_stats(pmu); if (!parse_events_add_pmu(parse_state, list, pmu, - &parsed_terms, auto_merge_stats, hw_config)) { + &parsed_terms, first_wildcard_match, hw_config)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1605,11 +1697,13 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, strbuf_release(&sb); ok++; } + if (first_wildcard_match == NULL) + first_wildcard_match = container_of(list->prev, struct evsel, core.node); } if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms, - /*auto_merge_stats=*/true, hw_config)) { + first_wildcard_match, hw_config)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1640,6 +1734,7 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state struct perf_pmu *pmu; int ok = 0; char *help; + struct evsel *first_wildcard_match = NULL; *listp = malloc(sizeof(**listp)); if (!*listp) @@ -1650,14 +1745,14 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state /* Attempt to add to list assuming event_or_pmu is a PMU name. */ pmu = perf_pmus__find(event_or_pmu); if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - /*auto_merge_stats=*/false, + first_wildcard_match, /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) return 0; if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(), const_parsed_terms, - /*auto_merge_stats=*/false, + first_wildcard_match, /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) return 0; } @@ -1667,15 +1762,16 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state while ((pmu = perf_pmus__scan(pmu)) != NULL) { if (!parse_events__filter_pmu(parse_state, pmu) && perf_pmu__wildcard_match(pmu, event_or_pmu)) { - bool auto_merge_stats = perf_pmu__auto_merge_stats(pmu); - if (!parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - auto_merge_stats, + first_wildcard_match, /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) { ok++; parse_state->wild_card_pmus = true; } + if (first_wildcard_match == NULL) + first_wildcard_match = + container_of((*listp)->prev, struct evsel, core.node); } } if (ok) @@ -2196,14 +2292,23 @@ int __parse_events(struct evlist *evlist, const char *str, const char *pmu_filte if (ret2 < 0) return ret; - if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus) - pr_warning("WARNING: events were regrouped to match PMUs\n"); - /* * Add list to the evlist even with errors to allow callers to clean up. */ evlist__splice_list_tail(evlist, &parse_state.list); + if (ret2 && warn_if_reordered && !parse_state.wild_card_pmus) { + pr_warning("WARNING: events were regrouped to match PMUs\n"); + + if (verbose > 0) { + struct strbuf sb = STRBUF_INIT; + + evlist__uniquify_evsel_names(evlist, &stat_config); + evlist__format_evsels(evlist, &sb, 2048); + pr_debug("evlist after sorting/fixing: '%s'\n", sb.buf); + strbuf_release(&sb); + } + } if (!ret) { struct evsel *last; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index e176a34ab088..ab242f671031 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -80,7 +80,8 @@ enum parse_events__term_type { PARSE_EVENTS__TERM_TYPE_RAW, PARSE_EVENTS__TERM_TYPE_LEGACY_CACHE, PARSE_EVENTS__TERM_TYPE_HARDWARE, -#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_HARDWARE + 1) + PARSE_EVENTS__TERM_TYPE_CPU, +#define __PARSE_EVENTS__TERM_TYPE_NR (PARSE_EVENTS__TERM_TYPE_CPU + 1) }; struct parse_events_term { diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 7ed86e3e34e3..4af7b9c1f44d 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -335,6 +335,7 @@ aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); } aux-action { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_ACTION); } aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); } metric-id { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_METRIC_ID); } +cpu { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CPU); } cpu-cycles|cycles { return hw_term(yyscanner, PERF_COUNT_HW_CPU_CYCLES); } stalled-cycles-frontend|idle-cycles-frontend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND); } stalled-cycles-backend|idle-cycles-backend { return hw_term(yyscanner, PERF_COUNT_HW_STALLED_CYCLES_BACKEND); } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index b7ebac5ab1d1..609828513f6c 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -27,6 +27,7 @@ #include <util/pmu-flex.h> #include "parse-events.h" #include "print-events.h" +#include "hashmap.h" #include "header.h" #include "string2.h" #include "strbuf.h" @@ -66,8 +67,6 @@ struct perf_pmu_alias { char *topic; /** @terms: Owned list of the original parsed parameters. */ struct parse_events_terms terms; - /** @list: List element of struct perf_pmu aliases. */ - struct list_head list; /** * @pmu_name: The name copied from the json struct pmu_event. This can * differ from the PMU name as it won't have suffixes. @@ -77,6 +76,12 @@ struct perf_pmu_alias { char unit[UNIT_MAX_LEN+1]; /** @scale: Value to scale read counter values by. */ double scale; + /** @retirement_latency_mean: Value to be given for unsampled retirement latency mean. */ + double retirement_latency_mean; + /** @retirement_latency_min: Value to be given for unsampled retirement latency min. */ + double retirement_latency_min; + /** @retirement_latency_max: Value to be given for unsampled retirement latency max. */ + double retirement_latency_max; /** * @per_pkg: Does the file * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.per-pkg or @@ -257,7 +262,7 @@ static int pmu_format(struct perf_pmu *pmu, int dirfd, const char *name, bool ea return 0; } -int perf_pmu__convert_scale(const char *scale, char **end, double *sval) +static int parse_double(const char *scale, char **end, double *sval) { char *lc; int ret = 0; @@ -294,6 +299,11 @@ out: return ret; } +int perf_pmu__convert_scale(const char *scale, char **end, double *sval) +{ + return parse_double(scale, end, sval); +} + static int perf_pmu__parse_scale(struct perf_pmu *pmu, struct perf_pmu_alias *alias) { struct stat st; @@ -407,25 +417,33 @@ static void perf_pmu__parse_snapshot(struct perf_pmu *pmu, struct perf_pmu_alias } /* Delete an alias entry. */ -static void perf_pmu_free_alias(struct perf_pmu_alias *newalias) +static void perf_pmu_free_alias(struct perf_pmu_alias *alias) { - zfree(&newalias->name); - zfree(&newalias->desc); - zfree(&newalias->long_desc); - zfree(&newalias->topic); - zfree(&newalias->pmu_name); - parse_events_terms__exit(&newalias->terms); - free(newalias); + if (!alias) + return; + + zfree(&alias->name); + zfree(&alias->desc); + zfree(&alias->long_desc); + zfree(&alias->topic); + zfree(&alias->pmu_name); + parse_events_terms__exit(&alias->terms); + free(alias); } static void perf_pmu__del_aliases(struct perf_pmu *pmu) { - struct perf_pmu_alias *alias, *tmp; + struct hashmap_entry *entry; + size_t bkt; - list_for_each_entry_safe(alias, tmp, &pmu->aliases, list) { - list_del(&alias->list); - perf_pmu_free_alias(alias); - } + if (!pmu->aliases) + return; + + hashmap__for_each_entry(pmu->aliases, entry, bkt) + perf_pmu_free_alias(entry->pvalue); + + hashmap__free(pmu->aliases); + pmu->aliases = NULL; } static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu, @@ -433,35 +451,37 @@ static struct perf_pmu_alias *perf_pmu__find_alias(struct perf_pmu *pmu, bool load) { struct perf_pmu_alias *alias; + bool has_sysfs_event; + char event_file_name[FILENAME_MAX + 8]; - if (load && !pmu->sysfs_aliases_loaded) { - bool has_sysfs_event; - char event_file_name[FILENAME_MAX + 8]; + if (hashmap__find(pmu->aliases, name, &alias)) + return alias; - /* - * Test if alias/event 'name' exists in the PMU's sysfs/events - * directory. If not skip parsing the sysfs aliases. Sysfs event - * name must be all lower or all upper case. - */ - scnprintf(event_file_name, sizeof(event_file_name), "events/%s", name); - for (size_t i = 7, n = 7 + strlen(name); i < n; i++) - event_file_name[i] = tolower(event_file_name[i]); + if (!load || pmu->sysfs_aliases_loaded) + return NULL; - has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); - if (!has_sysfs_event) { - for (size_t i = 7, n = 7 + strlen(name); i < n; i++) - event_file_name[i] = toupper(event_file_name[i]); + /* + * Test if alias/event 'name' exists in the PMU's sysfs/events + * directory. If not skip parsing the sysfs aliases. Sysfs event + * name must be all lower or all upper case. + */ + scnprintf(event_file_name, sizeof(event_file_name), "events/%s", name); + for (size_t i = 7, n = 7 + strlen(name); i < n; i++) + event_file_name[i] = tolower(event_file_name[i]); - has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); - } - if (has_sysfs_event) - pmu_aliases_parse(pmu); + has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); + if (!has_sysfs_event) { + for (size_t i = 7, n = 7 + strlen(name); i < n; i++) + event_file_name[i] = toupper(event_file_name[i]); + has_sysfs_event = perf_pmu__file_exists(pmu, event_file_name); } - list_for_each_entry(alias, &pmu->aliases, list) { - if (!strcasecmp(alias->name, name)) + if (has_sysfs_event) { + pmu_aliases_parse(pmu); + if (hashmap__find(pmu->aliases, name, &alias)) return alias; } + return NULL; } @@ -525,6 +545,18 @@ static int update_alias(const struct pmu_event *pe, if (!ret) snprintf(data->alias->unit, sizeof(data->alias->unit), "%s", unit); } + if (!ret && pe->retirement_latency_mean) { + ret = parse_double(pe->retirement_latency_mean, NULL, + &data->alias->retirement_latency_mean); + } + if (!ret && pe->retirement_latency_min) { + ret = parse_double(pe->retirement_latency_min, NULL, + &data->alias->retirement_latency_min); + } + if (!ret && pe->retirement_latency_max) { + ret = parse_double(pe->retirement_latency_max, NULL, + &data->alias->retirement_latency_max); + } return ret; } @@ -532,8 +564,8 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, const char *desc, const char *val, FILE *val_fd, const struct pmu_event *pe, enum event_source src) { - struct perf_pmu_alias *alias; - int ret; + struct perf_pmu_alias *alias, *old_alias; + int ret = 0; const char *long_desc = NULL, *topic = NULL, *unit = NULL, *pmu_name = NULL; bool deprecated = false, perpkg = false; @@ -562,6 +594,24 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, alias->per_pkg = perpkg; alias->snapshot = false; alias->deprecated = deprecated; + alias->retirement_latency_mean = 0.0; + alias->retirement_latency_min = 0.0; + alias->retirement_latency_max = 0.0; + + if (!ret && pe && pe->retirement_latency_mean) { + ret = parse_double(pe->retirement_latency_mean, NULL, + &alias->retirement_latency_mean); + } + if (!ret && pe && pe->retirement_latency_min) { + ret = parse_double(pe->retirement_latency_min, NULL, + &alias->retirement_latency_min); + } + if (!ret && pe && pe->retirement_latency_max) { + ret = parse_double(pe->retirement_latency_max, NULL, + &alias->retirement_latency_max); + } + if (ret) + return ret; ret = parse_events_terms(&alias->terms, val, val_fd); if (ret) { @@ -607,7 +657,8 @@ static int perf_pmu__new_alias(struct perf_pmu *pmu, const char *name, break; } - list_add_tail(&alias->list, &pmu->aliases); + hashmap__set(pmu->aliases, alias->name, alias, /*old_key=*/ NULL, &old_alias); + perf_pmu_free_alias(old_alias); return 0; } @@ -1095,43 +1146,77 @@ perf_pmu__arch_init(struct perf_pmu *pmu) pmu->mem_events = perf_mem_events; } +/* Variant of str_hash that does tolower on each character. */ +static size_t aliases__hash(long key, void *ctx __maybe_unused) +{ + const char *s = (const char *)key; + size_t h = 0; + + while (*s) { + h = h * 31 + tolower(*s); + s++; + } + return h; +} + +static bool aliases__equal(long key1, long key2, void *ctx __maybe_unused) +{ + return strcasecmp((const char *)key1, (const char *)key2) == 0; +} + +int perf_pmu__init(struct perf_pmu *pmu, __u32 type, const char *name) +{ + pmu->type = type; + INIT_LIST_HEAD(&pmu->format); + INIT_LIST_HEAD(&pmu->caps); + + pmu->name = strdup(name); + if (!pmu->name) + return -ENOMEM; + + pmu->aliases = hashmap__new(aliases__hash, aliases__equal, /*ctx=*/ NULL); + if (!pmu->aliases) + return -ENOMEM; + + return 0; +} + struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *name, bool eager_load) { struct perf_pmu *pmu; - __u32 type; pmu = zalloc(sizeof(*pmu)); if (!pmu) return NULL; - pmu->name = strdup(name); - if (!pmu->name) - goto err; + if (perf_pmu__init(pmu, PERF_PMU_TYPE_FAKE, name) != 0) { + perf_pmu__delete(pmu); + return NULL; + } /* * Read type early to fail fast if a lookup name isn't a PMU. Ensure * that type value is successfully assigned (return 1). */ - if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &type) != 1) - goto err; - - INIT_LIST_HEAD(&pmu->format); - INIT_LIST_HEAD(&pmu->aliases); - INIT_LIST_HEAD(&pmu->caps); + if (perf_pmu__scan_file_at(pmu, dirfd, "type", "%u", &pmu->type) != 1) { + perf_pmu__delete(pmu); + return NULL; + } /* * The pmu data we store & need consists of the pmu * type value and format definitions. Load both right * now. */ - if (pmu_format(pmu, dirfd, name, eager_load)) - goto err; + if (pmu_format(pmu, dirfd, name, eager_load)) { + perf_pmu__delete(pmu); + return NULL; + } pmu->is_core = is_pmu_core(name); pmu->cpus = pmu_cpumask(dirfd, name, pmu->is_core); - pmu->type = type; pmu->is_uncore = pmu_is_uncore(dirfd, name); if (pmu->is_uncore) pmu->id = pmu_id(name); @@ -1153,10 +1238,6 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char pmu_aliases_parse_eager(pmu, dirfd); return pmu; -err: - zfree(&pmu->name); - free(pmu); - return NULL; } /* Creates the PMU when sysfs scanning fails. */ @@ -1178,7 +1259,7 @@ struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pm pmu->cpus = cpu_map__online(); INIT_LIST_HEAD(&pmu->format); - INIT_LIST_HEAD(&pmu->aliases); + pmu->aliases = hashmap__new(aliases__hash, aliases__equal, /*ctx=*/ NULL); INIT_LIST_HEAD(&pmu->caps); list_add_tail(&pmu->list, core_pmus); return pmu; @@ -1429,7 +1510,7 @@ static int pmu_config_term(const struct perf_pmu *pmu, break; case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */ return -EINVAL; - case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_HARDWARE: + case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_CPU: /* Skip non-config terms. */ break; default: @@ -1678,6 +1759,9 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ info->unit = NULL; info->scale = 0.0; info->snapshot = false; + info->retirement_latency_mean = 0.0; + info->retirement_latency_min = 0.0; + info->retirement_latency_max = 0.0; if (perf_pmu__is_hwmon(pmu)) { ret = hwmon_pmu__check_alias(head_terms, info, err); @@ -1711,6 +1795,10 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ if (term->alternate_hw_config) *alternate_hw_config = term->val.num; + info->retirement_latency_mean = alias->retirement_latency_mean; + info->retirement_latency_min = alias->retirement_latency_min; + info->retirement_latency_max = alias->retirement_latency_max; + list_del_init(&term->list); parse_events_term__delete(term); } @@ -1804,6 +1892,7 @@ int perf_pmu__for_each_format(struct perf_pmu *pmu, void *state, pmu_format_call "aux-output", "aux-action=(pause|resume|start-paused)", "aux-sample-size=number", + "cpu=number", }; struct perf_pmu_format *format; int ret; @@ -1930,13 +2019,14 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, void *state, pmu_event_callback cb) { char buf[1024]; - struct perf_pmu_alias *event; struct pmu_event_info info = { .pmu = pmu, .event_type_desc = "Kernel PMU event", }; int ret = 0; struct strbuf sb; + struct hashmap_entry *entry; + size_t bkt; if (perf_pmu__is_hwmon(pmu)) return hwmon_pmu__for_each_event(pmu, state, cb); @@ -1944,7 +2034,8 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, strbuf_init(&sb, /*hint=*/ 0); pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); - list_for_each_entry(event, &pmu->aliases, list) { + hashmap__for_each_entry(pmu->aliases, entry, bkt) { + struct perf_pmu_alias *event = entry->pvalue; size_t buf_used, pmu_name_len; if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(event->name)) @@ -2052,6 +2143,9 @@ static bool perf_pmu___name_match(const struct perf_pmu *pmu, const char *to_mat for (size_t i = 0; i < ARRAY_SIZE(names); i++) { const char *name = names[i]; + if (!name) + continue; + if (wildcard && perf_pmu__match_wildcard_uncore(name, to_match)) return true; if (!wildcard && perf_pmu__match_ignoring_suffix_uncore(name, to_match)) @@ -2211,6 +2305,17 @@ static void perf_pmu__del_caps(struct perf_pmu *pmu) } } +struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name) +{ + struct perf_pmu_caps *caps; + + list_for_each_entry(caps, &pmu->caps, list) { + if (!strcmp(caps->name, name)) + return caps; + } + return NULL; +} + /* * Reading/parsing the given pmu capabilities, which should be located at: * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes. @@ -2401,6 +2506,9 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, void perf_pmu__delete(struct perf_pmu *pmu) { + if (!pmu) + return; + if (perf_pmu__is_hwmon(pmu)) hwmon_pmu__exit(pmu); @@ -2418,14 +2526,16 @@ void perf_pmu__delete(struct perf_pmu *pmu) const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config) { - struct perf_pmu_alias *event; + struct hashmap_entry *entry; + size_t bkt; if (!pmu) return NULL; pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); - list_for_each_entry(event, &pmu->aliases, list) { + hashmap__for_each_entry(pmu->aliases, entry, bkt) { + struct perf_pmu_alias *event = entry->pvalue; struct perf_event_attr attr = {.config = 0,}; int ret = perf_pmu__config(pmu, &attr, &event->terms, /*apply_hardcoded=*/true, diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index b93014cc3670..71b8636fd07d 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -14,6 +14,7 @@ #include "mem-events.h" struct evsel_config_term; +struct hashmap; struct perf_cpu_map; struct print_callbacks; @@ -125,7 +126,7 @@ struct perf_pmu { * event read from <sysfs>/bus/event_source/devices/<name>/events/ or * from json events in pmu-events.c. */ - struct list_head aliases; + struct hashmap *aliases; /** * @events_table: The events table for json events in pmu-events.c. */ @@ -194,6 +195,9 @@ struct perf_pmu { struct perf_pmu_info { const char *unit; double scale; + double retirement_latency_mean; + double retirement_latency_min; + double retirement_latency_max; bool per_pkg; bool snapshot; }; @@ -274,6 +278,8 @@ bool pmu_uncore_identifier_match(const char *compat, const char *id); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); +struct perf_pmu_caps *perf_pmu__get_cap(struct perf_pmu *pmu, const char *name); + int perf_pmu__caps_parse(struct perf_pmu *pmu); void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config, @@ -289,6 +295,7 @@ int perf_pmu__pathname_scnprintf(char *buf, size_t size, int perf_pmu__event_source_devices_fd(void); int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, int flags); +int perf_pmu__init(struct perf_pmu *pmu, __u32 type, const char *name); struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char *lookup_name, bool eager_load); struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index b99292de7669..3bbd26fec78a 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -727,14 +727,21 @@ struct perf_pmu *evsel__find_pmu(const struct evsel *evsel) legacy_core_type = evsel->core.attr.type == PERF_TYPE_HARDWARE || evsel->core.attr.type == PERF_TYPE_HW_CACHE; - if (!pmu && legacy_core_type) { - if (perf_pmus__supports_extended_type()) { - u32 type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; + if (!pmu && legacy_core_type && perf_pmus__supports_extended_type()) { + u32 type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; - pmu = perf_pmus__find_by_type(type); - } else { - pmu = perf_pmus__find_core_pmu(); - } + pmu = perf_pmus__find_by_type(type); + } + if (!pmu && (legacy_core_type || evsel->core.attr.type == PERF_TYPE_RAW)) { + /* + * For legacy events, if there was no extended type info then + * assume the PMU is the first core PMU. + * + * On architectures like ARM there is no sysfs PMU with type + * PERF_TYPE_RAW, assume the RAW events are going to be handled + * by the first core PMU. + */ + pmu = perf_pmus__find_core_pmu(); } ((struct evsel *)evsel)->pmu = pmu; return pmu; diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index 445efa1636c1..8f19c2bea64a 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -25,7 +25,8 @@ struct print_callbacks { const char *long_desc, const char *expr, const char *threshold, - const char *unit); + const char *unit, + const char *pmu_name); bool (*skip_duplicate_pmus)(void *print_state); }; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index f3c05da25b4a..321c333877fa 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -626,6 +626,92 @@ static int pyrf_thread_map__setup_types(void) return PyType_Ready(&pyrf_thread_map__type); } +struct pyrf_counts_values { + PyObject_HEAD + + struct perf_counts_values values; +}; + +static const char pyrf_counts_values__doc[] = PyDoc_STR("perf counts values object."); + +static void pyrf_counts_values__delete(struct pyrf_counts_values *pcounts_values) +{ + Py_TYPE(pcounts_values)->tp_free((PyObject *)pcounts_values); +} + +#define counts_values_member_def(member, ptype, help) \ + { #member, ptype, \ + offsetof(struct pyrf_counts_values, values.member), \ + 0, help } + +static PyMemberDef pyrf_counts_values_members[] = { + counts_values_member_def(val, T_ULONG, "Value of event"), + counts_values_member_def(ena, T_ULONG, "Time for which enabled"), + counts_values_member_def(run, T_ULONG, "Time for which running"), + counts_values_member_def(id, T_ULONG, "Unique ID for an event"), + counts_values_member_def(lost, T_ULONG, "Num of lost samples"), + { .name = NULL, }, +}; + +static PyObject *pyrf_counts_values_get_values(struct pyrf_counts_values *self, void *closure) +{ + PyObject *vals = PyList_New(5); + + if (!vals) + return NULL; + for (int i = 0; i < 5; i++) + PyList_SetItem(vals, i, PyLong_FromLong(self->values.values[i])); + + return vals; +} + +static int pyrf_counts_values_set_values(struct pyrf_counts_values *self, PyObject *list, + void *closure) +{ + Py_ssize_t size; + PyObject *item = NULL; + + if (!PyList_Check(list)) { + PyErr_SetString(PyExc_TypeError, "Value assigned must be a list"); + return -1; + } + + size = PyList_Size(list); + for (Py_ssize_t i = 0; i < size; i++) { + item = PyList_GetItem(list, i); + if (!PyLong_Check(item)) { + PyErr_SetString(PyExc_TypeError, "List members should be numbers"); + return -1; + } + self->values.values[i] = PyLong_AsLong(item); + } + + return 0; +} + +static PyGetSetDef pyrf_counts_values_getset[] = { + {"values", (getter)pyrf_counts_values_get_values, (setter)pyrf_counts_values_set_values, + "Name field", NULL}, + { .name = NULL, }, +}; + +static PyTypeObject pyrf_counts_values__type = { + PyVarObject_HEAD_INIT(NULL, 0) + .tp_name = "perf.counts_values", + .tp_basicsize = sizeof(struct pyrf_counts_values), + .tp_dealloc = (destructor)pyrf_counts_values__delete, + .tp_flags = Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, + .tp_doc = pyrf_counts_values__doc, + .tp_members = pyrf_counts_values_members, + .tp_getset = pyrf_counts_values_getset, +}; + +static int pyrf_counts_values__setup_types(void) +{ + pyrf_counts_values__type.tp_new = PyType_GenericNew; + return PyType_Ready(&pyrf_counts_values__type); +} + struct pyrf_evsel { PyObject_HEAD @@ -781,6 +867,58 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, return Py_None; } +static PyObject *pyrf_evsel__cpus(struct pyrf_evsel *pevsel) +{ + struct pyrf_cpu_map *pcpu_map = PyObject_New(struct pyrf_cpu_map, &pyrf_cpu_map__type); + + if (pcpu_map) + pcpu_map->cpus = perf_cpu_map__get(pevsel->evsel.core.cpus); + + return (PyObject *)pcpu_map; +} + +static PyObject *pyrf_evsel__threads(struct pyrf_evsel *pevsel) +{ + struct pyrf_thread_map *pthread_map = + PyObject_New(struct pyrf_thread_map, &pyrf_thread_map__type); + + if (pthread_map) + pthread_map->threads = perf_thread_map__get(pevsel->evsel.core.threads); + + return (PyObject *)pthread_map; +} + +static PyObject *pyrf_evsel__read(struct pyrf_evsel *pevsel, + PyObject *args, PyObject *kwargs) +{ + struct evsel *evsel = &pevsel->evsel; + int cpu = 0, cpu_idx, thread = 0, thread_idx; + struct perf_counts_values counts; + struct pyrf_counts_values *count_values = PyObject_New(struct pyrf_counts_values, + &pyrf_counts_values__type); + + if (!count_values) + return NULL; + + if (!PyArg_ParseTuple(args, "ii", &cpu, &thread)) + return NULL; + + cpu_idx = perf_cpu_map__idx(evsel->core.cpus, (struct perf_cpu){.cpu = cpu}); + if (cpu_idx < 0) { + PyErr_Format(PyExc_TypeError, "CPU %d is not part of evsel's CPUs", cpu); + return NULL; + } + thread_idx = perf_thread_map__idx(evsel->core.threads, thread); + if (cpu_idx < 0) { + PyErr_Format(PyExc_TypeError, "Thread %d is not part of evsel's threads", + thread); + return NULL; + } + perf_evsel__read(&(evsel->core), cpu_idx, thread_idx, &counts); + count_values->values = counts; + return (PyObject *)count_values; +} + static PyObject *pyrf_evsel__str(PyObject *self) { struct pyrf_evsel *pevsel = (void *)self; @@ -799,6 +937,24 @@ static PyMethodDef pyrf_evsel__methods[] = { .ml_flags = METH_VARARGS | METH_KEYWORDS, .ml_doc = PyDoc_STR("open the event selector file descriptor table.") }, + { + .ml_name = "cpus", + .ml_meth = (PyCFunction)pyrf_evsel__cpus, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("CPUs the event is to be used with.") + }, + { + .ml_name = "threads", + .ml_meth = (PyCFunction)pyrf_evsel__threads, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("threads the event is to be used with.") + }, + { + .ml_name = "read", + .ml_meth = (PyCFunction)pyrf_evsel__read, + .ml_flags = METH_VARARGS | METH_KEYWORDS, + .ml_doc = PyDoc_STR("read counters") + }, { .ml_name = NULL, } }; @@ -1054,6 +1210,16 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist, return Py_None; } +static PyObject *pyrf_evlist__close(struct pyrf_evlist *pevlist) +{ + struct evlist *evlist = &pevlist->evlist; + + evlist__close(evlist); + + Py_INCREF(Py_None); + return Py_None; +} + static PyObject *pyrf_evlist__config(struct pyrf_evlist *pevlist) { struct record_opts opts = { @@ -1113,6 +1279,12 @@ static PyMethodDef pyrf_evlist__methods[] = { .ml_doc = PyDoc_STR("open the file descriptors.") }, { + .ml_name = "close", + .ml_meth = (PyCFunction)pyrf_evlist__close, + .ml_flags = METH_NOARGS, + .ml_doc = PyDoc_STR("close the file descriptors.") + }, + { .ml_name = "poll", .ml_meth = (PyCFunction)pyrf_evlist__poll, .ml_flags = METH_VARARGS | METH_KEYWORDS, @@ -1442,7 +1614,8 @@ PyMODINIT_FUNC PyInit_perf(void) pyrf_evlist__setup_types() < 0 || pyrf_evsel__setup_types() < 0 || pyrf_thread_map__setup_types() < 0 || - pyrf_cpu_map__setup_types() < 0) + pyrf_cpu_map__setup_types() < 0 || + pyrf_counts_values__setup_types() < 0) return module; /* The page_size is placed in util object. */ @@ -1487,6 +1660,9 @@ PyMODINIT_FUNC PyInit_perf(void) Py_INCREF(&pyrf_cpu_map__type); PyModule_AddObject(module, "cpu_map", (PyObject*)&pyrf_cpu_map__type); + Py_INCREF(&pyrf_counts_values__type); + PyModule_AddObject(module, "counts_values", (PyObject *)&pyrf_counts_values__type); + dict = PyModule_GetDict(module); if (dict == NULL) goto error; diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h index a6566134e09e..ea3a6c4657ee 100644 --- a/tools/perf/util/record.h +++ b/tools/perf/util/record.h @@ -28,6 +28,7 @@ struct record_opts { bool sample_time_set; bool sample_cpu; bool sample_identifier; + bool sample_data_src; bool period; bool period_set; bool running_time; @@ -79,6 +80,7 @@ struct record_opts { int synth; int threads_spec; const char *threads_user_spec; + u64 off_cpu_thresh_ns; }; extern const char * const *record_usage; diff --git a/tools/perf/util/rwsem.c b/tools/perf/util/rwsem.c index 5109167f27f7..9d26832398db 100644 --- a/tools/perf/util/rwsem.c +++ b/tools/perf/util/rwsem.c @@ -27,6 +27,7 @@ int exit_rwsem(struct rw_semaphore *sem) } int down_read(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { #if RWS_ERRORCHECK mutex_lock(&sem->mtx); @@ -37,6 +38,7 @@ int down_read(struct rw_semaphore *sem) } int up_read(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { #if RWS_ERRORCHECK mutex_unlock(&sem->mtx); @@ -47,6 +49,7 @@ int up_read(struct rw_semaphore *sem) } int down_write(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { #if RWS_ERRORCHECK mutex_lock(&sem->mtx); @@ -57,6 +60,7 @@ int down_write(struct rw_semaphore *sem) } int up_write(struct rw_semaphore *sem) + NO_THREAD_SAFETY_ANALYSIS { #if RWS_ERRORCHECK mutex_unlock(&sem->mtx); diff --git a/tools/perf/util/rwsem.h b/tools/perf/util/rwsem.h index ef5cbc31d967..b102d8143181 100644 --- a/tools/perf/util/rwsem.h +++ b/tools/perf/util/rwsem.h @@ -10,7 +10,7 @@ */ #define RWS_ERRORCHECK 0 -struct rw_semaphore { +struct LOCKABLE rw_semaphore { #if RWS_ERRORCHECK struct mutex mtx; #else @@ -21,10 +21,10 @@ struct rw_semaphore { int init_rwsem(struct rw_semaphore *sem); int exit_rwsem(struct rw_semaphore *sem); -int down_read(struct rw_semaphore *sem); -int up_read(struct rw_semaphore *sem); +int down_read(struct rw_semaphore *sem) SHARED_LOCK_FUNCTION(sem); +int up_read(struct rw_semaphore *sem) UNLOCK_FUNCTION(sem); -int down_write(struct rw_semaphore *sem); -int up_write(struct rw_semaphore *sem); +int down_write(struct rw_semaphore *sem) EXCLUSIVE_LOCK_FUNCTION(sem); +int up_write(struct rw_semaphore *sem) UNLOCK_FUNCTION(sem); #endif /* _PERF_RWSEM_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 60fb9997ea0d..a320672c264e 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1400,7 +1400,9 @@ static s64 perf_session__process_user_event(struct perf_session *session, int err; perf_sample__init(&sample, /*all=*/true); - if (event->header.type != PERF_RECORD_COMPRESSED || perf_tool__compressed_is_stub(tool)) + if ((event->header.type != PERF_RECORD_COMPRESSED && + event->header.type != PERF_RECORD_COMPRESSED2) || + perf_tool__compressed_is_stub(tool)) dump_event(session->evlist, event, file_offset, &sample, file_path); /* These events are processed right away */ @@ -1481,6 +1483,7 @@ static s64 perf_session__process_user_event(struct perf_session *session, err = tool->feature(session, event); break; case PERF_RECORD_COMPRESSED: + case PERF_RECORD_COMPRESSED2: err = tool->compressed(session, event, file_offset, file_path); if (err) dump_event(session->evlist, event, file_offset, &sample, file_path); @@ -1639,8 +1642,17 @@ static s64 perf_session__process_event(struct perf_session *session, if (session->header.needs_swap) event_swap(event, evlist__sample_id_all(evlist)); - if (event->header.type >= PERF_RECORD_HEADER_MAX) - return -EINVAL; + if (event->header.type >= PERF_RECORD_HEADER_MAX) { + /* perf should not support unaligned event, stop here. */ + if (event->header.size % sizeof(u64)) + return -EINVAL; + + /* This perf is outdated and does not support the latest event type. */ + ui__warning("Unsupported header type %u, please consider updating perf.\n", + event->header.type); + /* Skip unsupported event by returning its size. */ + return event->header.size; + } events_stats__inc(&evlist->stats, event->header.type); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index c51049087e4e..45e654653960 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -141,6 +141,43 @@ struct sort_entry sort_thread = { .se_width_idx = HISTC_THREAD, }; +/* --sort tgid */ + +static int64_t +sort__tgid_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return thread__pid(right->thread) - thread__pid(left->thread); +} + +static int hist_entry__tgid_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + int tgid = thread__pid(he->thread); + const char *comm = NULL; + + /* display comm of the thread-group leader */ + if (thread__pid(he->thread) == thread__tid(he->thread)) { + comm = thread__comm_str(he->thread); + } else { + struct maps *maps = thread__maps(he->thread); + struct thread *leader = machine__find_thread(maps__machine(maps), + tgid, tgid); + if (leader) { + comm = thread__comm_str(leader); + thread__put(leader); + } + } + width = max(7U, width) - 8; + return repsep_snprintf(bf, size, "%7d:%-*.*s", tgid, width, width, comm ?: ""); +} + +struct sort_entry sort_tgid = { + .se_header = " Tgid:Command", + .se_cmp = sort__tgid_cmp, + .se_snprintf = hist_entry__tgid_snprintf, + .se_width_idx = HISTC_TGID, +}; + /* --sort simd */ static int64_t @@ -2508,6 +2545,7 @@ static void sort_dimension_add_dynamic_header(struct sort_dimension *sd) static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_PID, "pid", sort_thread), + DIM(SORT_TGID, "tgid", sort_tgid), DIM(SORT_COMM, "comm", sort_comm), DIM(SORT_DSO, "dso", sort_dso), DIM(SORT_SYM, "symbol", sort_sym), @@ -2598,9 +2636,11 @@ struct hpp_dimension { struct perf_hpp_fmt *fmt; int taken; int was_taken; + int mem_mode; }; #define DIM(d, n) { .name = n, .fmt = &perf_hpp__format[d], } +#define DIM_MEM(d, n) { .name = n, .fmt = &perf_hpp__format[d], .mem_mode = 1, } static struct hpp_dimension hpp_sort_dimensions[] = { DIM(PERF_HPP__OVERHEAD, "overhead"), @@ -2620,8 +2660,15 @@ static struct hpp_dimension hpp_sort_dimensions[] = { DIM(PERF_HPP__WEIGHT2, "ins_lat"), DIM(PERF_HPP__WEIGHT3, "retire_lat"), DIM(PERF_HPP__WEIGHT3, "p_stage_cyc"), + /* used for output only when SORT_MODE__MEM */ + DIM_MEM(PERF_HPP__MEM_STAT_OP, "op"), + DIM_MEM(PERF_HPP__MEM_STAT_CACHE, "cache"), + DIM_MEM(PERF_HPP__MEM_STAT_MEMORY, "memory"), + DIM_MEM(PERF_HPP__MEM_STAT_SNOOP, "snoop"), + DIM_MEM(PERF_HPP__MEM_STAT_DTLB, "dtlb"), }; +#undef DIM_MEM #undef DIM struct hpp_sort_entry { @@ -2641,18 +2688,22 @@ void perf_hpp__reset_sort_width(struct perf_hpp_fmt *fmt, struct hists *hists) } static int __sort__hpp_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct hists *hists, int line __maybe_unused, + struct hists *hists, int line, int *span __maybe_unused) { struct hpp_sort_entry *hse; size_t len = fmt->user_len; + const char *hdr = ""; + + if (line == hists->hpp_list->nr_header_lines - 1) + hdr = fmt->name; hse = container_of(fmt, struct hpp_sort_entry, hpp); if (!len) len = hists__col_len(hists, hse->se->se_width_idx); - return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, fmt->name); + return scnprintf(hpp->buf, hpp->size, "%-*.*s", len, len, hdr); } static int __sort__hpp_width(struct perf_hpp_fmt *fmt, @@ -2884,9 +2935,10 @@ static int __sort_dimension__add_hpp_sort(struct sort_dimension *sd, } static int __sort_dimension__add_hpp_output(struct sort_dimension *sd, - struct perf_hpp_list *list) + struct perf_hpp_list *list, + int level) { - struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, 0); + struct hpp_sort_entry *hse = __sort_dimension__alloc_hpp(sd, level); if (hse == NULL) return -1; @@ -3495,12 +3547,13 @@ static int __hpp_dimension__add(struct hpp_dimension *hd, } static int __sort_dimension__add_output(struct perf_hpp_list *list, - struct sort_dimension *sd) + struct sort_dimension *sd, + int level) { if (sd->taken) return 0; - if (__sort_dimension__add_hpp_output(sd, list) < 0) + if (__sort_dimension__add_hpp_output(sd, list, level) < 0) return -1; sd->taken = 1; @@ -3508,14 +3561,15 @@ static int __sort_dimension__add_output(struct perf_hpp_list *list, } static int __hpp_dimension__add_output(struct perf_hpp_list *list, - struct hpp_dimension *hd) + struct hpp_dimension *hd, + int level) { struct perf_hpp_fmt *fmt; if (hd->taken) return 0; - fmt = __hpp_dimension__alloc_hpp(hd, 0); + fmt = __hpp_dimension__alloc_hpp(hd, level); if (!fmt) return -1; @@ -3532,7 +3586,7 @@ int hpp_dimension__add_output(unsigned col, bool implicit) hd = &hpp_sort_dimensions[col]; if (implicit && !hd->was_taken) return 0; - return __hpp_dimension__add_output(&perf_hpp_list, hd); + return __hpp_dimension__add_output(&perf_hpp_list, hd, /*level=*/0); } int sort_dimension__add(struct perf_hpp_list *list, const char *tok, @@ -3601,15 +3655,6 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, return __sort_dimension__add(sd, list, level); } - for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { - struct hpp_dimension *hd = &hpp_sort_dimensions[i]; - - if (strncasecmp(tok, hd->name, strlen(tok))) - continue; - - return __hpp_dimension__add(hd, list, level); - } - for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { struct sort_dimension *sd = &bstack_sort_dimensions[i]; @@ -3651,6 +3696,15 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, return 0; } + for (i = 0; i < ARRAY_SIZE(hpp_sort_dimensions); i++) { + struct hpp_dimension *hd = &hpp_sort_dimensions[i]; + + if (strncasecmp(tok, hd->name, strlen(tok))) + continue; + + return __hpp_dimension__add(hd, list, level); + } + if (!add_dynamic_entry(evlist, tok, level)) return 0; @@ -4000,7 +4054,7 @@ void sort__setup_elide(FILE *output) } } -int output_field_add(struct perf_hpp_list *list, const char *tok) +int output_field_add(struct perf_hpp_list *list, const char *tok, int *level) { unsigned int i; @@ -4013,16 +4067,25 @@ int output_field_add(struct perf_hpp_list *list, const char *tok) if (!strcasecmp(tok, "weight")) ui__warning("--fields weight shows the average value unlike in the --sort key.\n"); - return __hpp_dimension__add_output(list, hd); + if (hd->mem_mode && sort__mode != SORT_MODE__MEMORY) + continue; + + return __hpp_dimension__add_output(list, hd, *level); } + /* + * A non-output field will increase level so that it can be in a + * different hierarchy. + */ + (*level)++; + for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { struct sort_dimension *sd = &common_sort_dimensions[i]; if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; - return __sort_dimension__add_output(list, sd); + return __sort_dimension__add_output(list, sd, *level); } for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) { @@ -4034,7 +4097,7 @@ int output_field_add(struct perf_hpp_list *list, const char *tok) if (sort__mode != SORT_MODE__BRANCH) return -EINVAL; - return __sort_dimension__add_output(list, sd); + return __sort_dimension__add_output(list, sd, *level); } for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) { @@ -4046,7 +4109,7 @@ int output_field_add(struct perf_hpp_list *list, const char *tok) if (sort__mode != SORT_MODE__MEMORY) return -EINVAL; - return __sort_dimension__add_output(list, sd); + return __sort_dimension__add_output(list, sd, *level); } return -ESRCH; @@ -4056,10 +4119,11 @@ static int setup_output_list(struct perf_hpp_list *list, char *str) { char *tmp, *tok; int ret = 0; + int level = 0; for (tok = strtok_r(str, ", ", &tmp); tok; tok = strtok_r(NULL, ", ", &tmp)) { - ret = output_field_add(list, tok); + ret = output_field_add(list, tok, &level); if (ret == -EINVAL) { ui__error("Invalid --fields key: `%s'", tok); break; @@ -4149,6 +4213,10 @@ int setup_sorting(struct evlist *evlist) if (err < 0) return err; + err = perf_hpp__alloc_mem_stats(&perf_hpp_list, evlist); + if (err < 0) + return err; + /* copy sort keys to output fields */ perf_hpp__setup_output_field(&perf_hpp_list); /* and then copy output fields to sort keys */ diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 180d36a2bea3..a742ab7f3c67 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -73,6 +73,7 @@ enum sort_type { SORT_SYM_OFFSET, SORT_ANNOTATE_DATA_TYPE_CACHELINE, SORT_PARALLELISM, + SORT_TGID, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, @@ -146,7 +147,7 @@ void reset_dimensions(void); int sort_dimension__add(struct perf_hpp_list *list, const char *tok, struct evlist *evlist, int level); -int output_field_add(struct perf_hpp_list *list, const char *tok); +int output_field_add(struct perf_hpp_list *list, const char *tok, int *level); int64_t sort__iaddr_cmp(struct hist_entry *left, struct hist_entry *right); int64_t diff --git a/tools/perf/util/srccode.c b/tools/perf/util/srccode.c index 476e99896d5e..0f4907843ac1 100644 --- a/tools/perf/util/srccode.c +++ b/tools/perf/util/srccode.c @@ -16,7 +16,7 @@ #include "srccode.h" #include "debug.h" #include <internal/lib.h> // page_size -#include "fncache.h" +#include "hashmap.h" #define MAXSRCCACHE (32*1024*1024) #define MAXSRCFILES 64 @@ -92,7 +92,7 @@ static struct srcfile *find_srcfile(char *fn) struct srcfile *h; int fd; unsigned long sz; - unsigned hval = shash((unsigned char *)fn) % SRC_HTAB_SZ; + size_t hval = str_hash(fn) % SRC_HTAB_SZ; hlist_for_each_entry (h, &srcfile_htab[hval], hash_nd) { if (!strcmp(fn, h->fn)) { diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index e852ac0d9847..729ad5cd52cb 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -798,40 +798,28 @@ static void abs_printout(struct perf_stat_config *config, print_cgroup(config, os, evsel->cgrp); } -static bool is_mixed_hw_group(struct evsel *counter) -{ - struct evlist *evlist = counter->evlist; - u32 pmu_type = counter->core.attr.type; - struct evsel *pos; - - if (counter->core.nr_members < 2) - return false; - - evlist__for_each_entry(evlist, pos) { - /* software events can be part of any hardware group */ - if (pos->core.attr.type == PERF_TYPE_SOFTWARE) - continue; - if (pmu_type == PERF_TYPE_SOFTWARE) { - pmu_type = pos->core.attr.type; - continue; - } - if (pmu_type != pos->core.attr.type) - return true; - } - - return false; -} - -static bool evlist__has_hybrid(struct evlist *evlist) +static bool evlist__has_hybrid_pmus(struct evlist *evlist) { struct evsel *evsel; + struct perf_pmu *last_core_pmu = NULL; if (perf_pmus__num_core_pmus() == 1) return false; evlist__for_each_entry(evlist, evsel) { - if (evsel->core.is_pmu_core) + if (evsel->core.is_pmu_core) { + struct perf_pmu *pmu = evsel__find_pmu(evsel); + + if (pmu == last_core_pmu) + continue; + + if (last_core_pmu == NULL) { + last_core_pmu = pmu; + continue; + } + /* A distinct core PMU. */ return true; + } } return false; @@ -872,10 +860,8 @@ static void printout(struct perf_stat_config *config, struct outstate *os, ok = false; if (counter->supported) { - if (!evlist__has_hybrid(counter->evlist)) { + if (!evlist__has_hybrid_pmus(counter->evlist)) { config->print_free_counters_hint = 1; - if (is_mixed_hw_group(counter)) - config->print_mixed_hw_group_error = 1; } } } @@ -929,61 +915,6 @@ static void printout(struct perf_stat_config *config, struct outstate *os, } } -static void evsel__uniquify_counter(struct evsel *counter) -{ - const char *name, *pmu_name; - char *new_name, *config; - int ret; - - /* No uniquification necessary. */ - if (!counter->needs_uniquify) - return; - - /* The evsel was already uniquified. */ - if (counter->uniquified_name) - return; - - /* Avoid checking to uniquify twice. */ - counter->uniquified_name = true; - - name = evsel__name(counter); - pmu_name = counter->pmu->name; - /* Already prefixed by the PMU name. */ - if (!strncmp(name, pmu_name, strlen(pmu_name))) - return; - - config = strchr(name, '/'); - if (config) { - int len = config - name; - - if (config[1] == '/') { - /* case: event// */ - ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2); - } else { - /* case: event/.../ */ - ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1); - } - } else { - config = strchr(name, ':'); - if (config) { - /* case: event:.. */ - int len = config - name; - - ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1); - } else { - /* case: event */ - ret = asprintf(&new_name, "%s/%s/", pmu_name, name); - } - } - if (ret > 0) { - free(counter->name); - counter->name = new_name; - } else { - /* ENOMEM from asprintf. */ - counter->uniquified_name = false; - } -} - /** * should_skip_zero_count() - Check if the event should print 0 values. * @config: The perf stat configuration (including aggregation mode). @@ -1022,8 +953,16 @@ static bool should_skip_zero_counter(struct perf_stat_config *config, return true; /* - * Many tool events are only gathered on the first index, skip other - * zero values. + * In per-thread mode the aggr_map and aggr_get_id functions may be + * NULL, assume all 0 values should be output in that case. + */ + if (!config->aggr_map || !config->aggr_get_id) + return false; + + /* + * Tool events may be gathered on all logical CPUs, for example + * system_time, but for many the first index is the only one used, for + * example num_cores. Don't skip for the first index. */ if (evsel__is_tool(counter)) { struct aggr_cpu_id own_id = @@ -1031,15 +970,12 @@ static bool should_skip_zero_counter(struct perf_stat_config *config, return !aggr_cpu_id__equal(id, &own_id); } - /* - * Skip value 0 when it's an uncore event and the given aggr id - * does not belong to the PMU cpumask. + * Skip value 0 when the counter's cpumask doesn't match the given aggr + * id. */ - if (!counter->pmu || !counter->pmu->is_uncore) - return false; - perf_cpu_map__for_each_cpu(cpu, idx, counter->pmu->cpus) { + perf_cpu_map__for_each_cpu(cpu, idx, counter->core.cpus) { struct aggr_cpu_id own_id = config->aggr_get_id(config, cpu); if (aggr_cpu_id__equal(id, &own_id)) @@ -1066,10 +1002,15 @@ static void print_counter_aggrdata(struct perf_stat_config *config, os->evsel = counter; /* Skip already merged uncore/hybrid events */ - if (counter->merged_stat) - return; - - evsel__uniquify_counter(counter); + if (config->aggr_mode != AGGR_NONE) { + if (evsel__is_hybrid(counter)) { + if (config->hybrid_merge && counter->first_wildcard_match != NULL) + return; + } else { + if (counter->first_wildcard_match != NULL) + return; + } + } val = aggr->counts.val; ena = aggr->counts.ena; @@ -1575,11 +1516,6 @@ static void print_footer(struct perf_stat_config *config) " echo 0 > /proc/sys/kernel/nmi_watchdog\n" " perf stat ...\n" " echo 1 > /proc/sys/kernel/nmi_watchdog\n"); - - if (config->print_mixed_hw_group_error) - fprintf(output, - "The events in group usually have to be from " - "the same PMU. Try reorganizing the group.\n"); } static void print_percore(struct perf_stat_config *config, @@ -1650,96 +1586,6 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist print_metric_end(config, os); } -/* Should uniquify be disabled for the evlist? */ -static bool evlist__disable_uniquify(const struct evlist *evlist) -{ - struct evsel *counter; - struct perf_pmu *last_pmu = NULL; - bool first = true; - - evlist__for_each_entry(evlist, counter) { - /* If PMUs vary then uniquify can be useful. */ - if (!first && counter->pmu != last_pmu) - return false; - first = false; - if (counter->pmu) { - /* Allow uniquify for uncore PMUs. */ - if (!counter->pmu->is_core) - return false; - /* Keep hybrid event names uniquified for clarity. */ - if (perf_pmus__num_core_pmus() > 1) - return false; - } - } - return true; -} - -static void evsel__set_needs_uniquify(struct evsel *counter, const struct perf_stat_config *config) -{ - struct evsel *evsel; - - if (counter->merged_stat) { - /* Counter won't be shown. */ - return; - } - - if (counter->use_config_name || counter->is_libpfm_event) { - /* Original name will be used. */ - return; - } - - if (!config->hybrid_merge && evsel__is_hybrid(counter)) { - /* Unique hybrid counters necessary. */ - counter->needs_uniquify = true; - return; - } - - if (counter->core.attr.type < PERF_TYPE_MAX && counter->core.attr.type != PERF_TYPE_RAW) { - /* Legacy event, don't uniquify. */ - return; - } - - if (counter->pmu && counter->pmu->is_core && - counter->alternate_hw_config != PERF_COUNT_HW_MAX) { - /* A sysfs or json event replacing a legacy event, don't uniquify. */ - return; - } - - if (config->aggr_mode == AGGR_NONE) { - /* Always unique with no aggregation. */ - counter->needs_uniquify = true; - return; - } - - /* - * Do other non-merged events in the evlist have the same name? If so - * uniquify is necessary. - */ - evlist__for_each_entry(counter->evlist, evsel) { - if (evsel == counter || evsel->merged_stat) - continue; - - if (evsel__name_is(counter, evsel__name(evsel))) { - counter->needs_uniquify = true; - return; - } - } -} - -static void evlist__set_needs_uniquify(struct evlist *evlist, const struct perf_stat_config *config) -{ - struct evsel *counter; - - if (evlist__disable_uniquify(evlist)) { - evlist__for_each_entry(evlist, counter) - counter->uniquified_name = true; - return; - } - - evlist__for_each_entry(evlist, counter) - evsel__set_needs_uniquify(counter, config); -} - void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv) @@ -1751,7 +1597,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf .first = true, }; - evlist__set_needs_uniquify(evlist, config); + evlist__uniquify_evsel_names(evlist, config); if (config->iostat_run) evlist->selected = evlist__first(evlist); diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 1f7abd8754c7..355a7d5c8ab8 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -535,35 +535,6 @@ static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias) return 0; } -/* - * Events should have the same name, scale, unit, cgroup but on different core - * PMUs or on different but matching uncore PMUs. - */ -static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b) -{ - if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b))) - return false; - - if (evsel_a->scale != evsel_b->scale) - return false; - - if (evsel_a->cgrp != evsel_b->cgrp) - return false; - - if (strcmp(evsel_a->unit, evsel_b->unit)) - return false; - - if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b)) - return false; - - if (evsel_a->pmu == evsel_b->pmu || evsel_a->pmu == NULL || evsel_b->pmu == NULL) - return false; - - if (evsel_a->pmu->is_core) - return evsel_b->pmu->is_core; - - return perf_pmu__name_no_suffix_match(evsel_a->pmu, evsel_b->pmu->name); -} static void evsel__merge_aliases(struct evsel *evsel) { @@ -572,10 +543,9 @@ static void evsel__merge_aliases(struct evsel *evsel) alias = list_prepare_entry(evsel, &(evlist->core.entries), core.node); list_for_each_entry_continue(alias, &evlist->core.entries, core.node) { - /* Merge the same events on different PMUs. */ - if (evsel__is_alias(evsel, alias)) { + if (alias->first_wildcard_match == evsel) { + /* Merge the same events on different PMUs. */ evsel__merge_aggr_counters(evsel, alias); - alias->merged_stat = true; } } } @@ -588,11 +558,7 @@ static bool evsel__should_merge_hybrid(const struct evsel *evsel, static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config) { - /* this evsel is already merged */ - if (evsel->merged_stat) - return; - - if (evsel->auto_merge_stats || evsel__should_merge_hybrid(evsel, config)) + if (!evsel->pmu || !evsel->pmu->is_core || evsel__should_merge_hybrid(evsel, config)) evsel__merge_aliases(evsel); } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2fda9acd7374..1bcd7634bf47 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -100,7 +100,6 @@ struct perf_stat_config { int times; int run_count; int print_free_counters_hint; - int print_mixed_hw_group_error; const char *csv_sep; struct stats *walltime_nsecs_stats; struct rusage ru_data; diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index fbf6d0f73af9..01818abd02df 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -13,10 +13,6 @@ #include "maps.h" #include "symbol.h" #include "symsrc.h" -#include "demangle-cxx.h" -#include "demangle-ocaml.h" -#include "demangle-java.h" -#include "demangle-rust.h" #include "machine.h" #include "vdso.h" #include "debug.h" @@ -279,62 +275,6 @@ static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr) return -1; } -static bool want_demangle(bool is_kernel_sym) -{ - return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; -} - -/* - * Demangle C++ function signature, typically replaced by demangle-cxx.cpp - * version. - */ -#ifndef HAVE_CXA_DEMANGLE_SUPPORT -char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused, - bool modifiers __maybe_unused) -{ -#ifdef HAVE_LIBBFD_SUPPORT - int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); - - return bfd_demangle(NULL, str, flags); -#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT) - int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); - - return cplus_demangle(str, flags); -#else - return NULL; -#endif -} -#endif /* !HAVE_CXA_DEMANGLE_SUPPORT */ - -static char *demangle_sym(struct dso *dso, int kmodule, const char *elf_name) -{ - char *demangled = NULL; - - /* - * We need to figure out if the object was created from C++ sources - * DWARF DW_compile_unit has this, but we don't always have access - * to it... - */ - if (!want_demangle(dso__kernel(dso) || kmodule)) - return demangled; - - demangled = cxx_demangle_sym(elf_name, verbose > 0, verbose > 0); - if (demangled == NULL) { - demangled = ocaml_demangle_sym(elf_name); - if (demangled == NULL) { - demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); - } - } - else if (rust_is_mangled(demangled)) - /* - * Input to Rust demangling is the BFD-demangled - * name which it Rust-demangles in place. - */ - rust_demangle_sym(demangled); - - return demangled; -} - struct rel_info { u32 nr_entries; u32 *sorted; @@ -620,7 +560,7 @@ static bool get_plt_got_name(GElf_Shdr *shdr, size_t i, /* Get the associated symbol */ gelf_getsym(di->dynsym_data, vr->sym_idx, &sym); sym_name = elf_sym__name(&sym, di->dynstr_data); - demangled = demangle_sym(di->dso, 0, sym_name); + demangled = dso__demangle_sym(di->dso, /*kmodule=*/0, sym_name); if (demangled != NULL) sym_name = demangled; @@ -818,7 +758,7 @@ int dso__synthesize_plt_symbols(struct dso *dso, struct symsrc *ss) gelf_getsym(syms, get_rel_symidx(&ri, idx), &sym); elf_name = elf_sym__name(&sym, symstrs); - demangled = demangle_sym(dso, 0, elf_name); + demangled = dso__demangle_sym(dso, /*kmodule=*/0, elf_name); if (demangled) elf_name = demangled; if (*elf_name) @@ -847,11 +787,6 @@ out_elf_end: return 0; } -char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name) -{ - return demangle_sym(dso, kmodule, elf_name); -} - /* * Align offset to 4 bytes as needed for note name and descriptor data. */ @@ -1840,7 +1775,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss, } } - demangled = demangle_sym(dso, kmodule, elf_name); + demangled = dso__demangle_sym(dso, kmodule, elf_name); if (demangled != NULL) elf_name = demangled; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index c6f369b5d893..c73fe2e09fe9 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -90,11 +90,23 @@ int filename__read_build_id(const char *filename, struct build_id *bid) { FILE *fp; int ret = -1; - bool need_swap = false; + bool need_swap = false, elf32; u8 e_ident[EI_NIDENT]; - size_t buf_size; - void *buf; int i; + union { + struct { + Elf32_Ehdr ehdr32; + Elf32_Phdr *phdr32; + }; + struct { + Elf64_Ehdr ehdr64; + Elf64_Phdr *phdr64; + }; + } hdrs; + void *phdr; + size_t phdr_size; + void *buf = NULL; + size_t buf_size = 0; fp = fopen(filename, "r"); if (fp == NULL) @@ -108,117 +120,79 @@ int filename__read_build_id(const char *filename, struct build_id *bid) goto out; need_swap = check_need_swap(e_ident[EI_DATA]); + elf32 = e_ident[EI_CLASS] == ELFCLASS32; - /* for simplicity */ - fseek(fp, 0, SEEK_SET); - - if (e_ident[EI_CLASS] == ELFCLASS32) { - Elf32_Ehdr ehdr; - Elf32_Phdr *phdr; - - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + if (fread(elf32 ? (void *)&hdrs.ehdr32 : (void *)&hdrs.ehdr64, + elf32 ? sizeof(hdrs.ehdr32) : sizeof(hdrs.ehdr64), + 1, fp) != 1) + goto out; - if (need_swap) { - ehdr.e_phoff = bswap_32(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (need_swap) { + if (elf32) { + hdrs.ehdr32.e_phoff = bswap_32(hdrs.ehdr32.e_phoff); + hdrs.ehdr32.e_phentsize = bswap_16(hdrs.ehdr32.e_phentsize); + hdrs.ehdr32.e_phnum = bswap_16(hdrs.ehdr32.e_phnum); + } else { + hdrs.ehdr64.e_phoff = bswap_64(hdrs.ehdr64.e_phoff); + hdrs.ehdr64.e_phentsize = bswap_16(hdrs.ehdr64.e_phentsize); + hdrs.ehdr64.e_phnum = bswap_16(hdrs.ehdr64.e_phnum); } + } + phdr_size = elf32 ? hdrs.ehdr32.e_phentsize * hdrs.ehdr32.e_phnum + : hdrs.ehdr64.e_phentsize * hdrs.ehdr64.e_phnum; + phdr = malloc(phdr_size); + if (phdr == NULL) + goto out; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { - void *tmp; - long offset; - - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_32(phdr->p_offset); - phdr->p_filesz = bswap_32(phdr->p_filesz); - } - - if (phdr->p_type != PT_NOTE) - continue; - - buf_size = phdr->p_filesz; - offset = phdr->p_offset; - tmp = realloc(buf, buf_size); - if (tmp == NULL) - goto out_free; - - buf = tmp; - fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; + fseek(fp, elf32 ? hdrs.ehdr32.e_phoff : hdrs.ehdr64.e_phoff, SEEK_SET); + if (fread(phdr, phdr_size, 1, fp) != 1) + goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) { - ret = bid->size; - break; - } - } - } else { - Elf64_Ehdr ehdr; - Elf64_Phdr *phdr; + if (elf32) + hdrs.phdr32 = phdr; + else + hdrs.phdr64 = phdr; - if (fread(&ehdr, sizeof(ehdr), 1, fp) != 1) - goto out; + for (i = 0; i < elf32 ? hdrs.ehdr32.e_phnum : hdrs.ehdr64.e_phnum; i++) { + size_t p_filesz; if (need_swap) { - ehdr.e_phoff = bswap_64(ehdr.e_phoff); - ehdr.e_phentsize = bswap_16(ehdr.e_phentsize); - ehdr.e_phnum = bswap_16(ehdr.e_phnum); + if (elf32) { + hdrs.phdr32[i].p_type = bswap_32(hdrs.phdr32[i].p_type); + hdrs.phdr32[i].p_offset = bswap_32(hdrs.phdr32[i].p_offset); + hdrs.phdr32[i].p_filesz = bswap_32(hdrs.phdr32[i].p_offset); + } else { + hdrs.phdr64[i].p_type = bswap_32(hdrs.phdr64[i].p_type); + hdrs.phdr64[i].p_offset = bswap_64(hdrs.phdr64[i].p_offset); + hdrs.phdr64[i].p_filesz = bswap_64(hdrs.phdr64[i].p_filesz); + } } + if ((elf32 ? hdrs.phdr32[i].p_type : hdrs.phdr64[i].p_type) != PT_NOTE) + continue; - buf_size = ehdr.e_phentsize * ehdr.e_phnum; - buf = malloc(buf_size); - if (buf == NULL) - goto out; - - fseek(fp, ehdr.e_phoff, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; - - for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) { + p_filesz = elf32 ? hdrs.phdr32[i].p_filesz : hdrs.phdr64[i].p_filesz; + if (p_filesz > buf_size) { void *tmp; - long offset; - - if (need_swap) { - phdr->p_type = bswap_32(phdr->p_type); - phdr->p_offset = bswap_64(phdr->p_offset); - phdr->p_filesz = bswap_64(phdr->p_filesz); - } - - if (phdr->p_type != PT_NOTE) - continue; - buf_size = phdr->p_filesz; - offset = phdr->p_offset; + buf_size = p_filesz; tmp = realloc(buf, buf_size); if (tmp == NULL) goto out_free; - buf = tmp; - fseek(fp, offset, SEEK_SET); - if (fread(buf, buf_size, 1, fp) != 1) - goto out_free; + } + fseek(fp, elf32 ? hdrs.phdr32[i].p_offset : hdrs.phdr64[i].p_offset, SEEK_SET); + if (fread(buf, p_filesz, 1, fp) != 1) + goto out_free; - ret = read_build_id(buf, buf_size, bid, need_swap); - if (ret == 0) { - ret = bid->size; - break; - } + ret = read_build_id(buf, p_filesz, bid, need_swap); + if (ret == 0) { + ret = bid->size; + break; } } out_free: free(buf); + free(phdr); out: fclose(fp); return ret; @@ -381,13 +355,6 @@ void symbol__elf_init(void) { } -char *dso__demangle_sym(struct dso *dso __maybe_unused, - int kmodule __maybe_unused, - const char *elf_name __maybe_unused) -{ - return NULL; -} - bool filename__has_section(const char *filename __maybe_unused, const char *sec __maybe_unused) { return false; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 11540219481b..8b30c6f16a9e 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -19,6 +19,11 @@ #include "build-id.h" #include "cap.h" #include "cpumap.h" +#include "debug.h" +#include "demangle-cxx.h" +#include "demangle-java.h" +#include "demangle-ocaml.h" +#include "demangle-rust-v0.h" #include "dso.h" #include "util.h" // lsdir() #include "debug.h" @@ -36,6 +41,7 @@ #include "header.h" #include "path.h" #include <linux/ctype.h> +#include <linux/log2.h> #include <linux/zalloc.h> #include <elf.h> @@ -98,10 +104,12 @@ static enum dso_binary_type binary_type_symtab[] = { #define DSO_BINARY_TYPE__SYMTAB_CNT ARRAY_SIZE(binary_type_symtab) -static bool symbol_type__filter(char symbol_type) +static bool symbol_type__filter(char __symbol_type) { - symbol_type = toupper(symbol_type); - return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B'; + // Since 'U' == undefined and 'u' == unique global symbol, we can't use toupper there + char symbol_type = toupper(__symbol_type); + return symbol_type == 'T' || symbol_type == 'W' || symbol_type == 'D' || symbol_type == 'B' || + __symbol_type == 'u' || __symbol_type == 'l'; } static int prefix_underscores_count(const char *str) @@ -2646,3 +2654,79 @@ int symbol__validate_sym_arguments(void) } return 0; } + +static bool want_demangle(bool is_kernel_sym) +{ + return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; +} + +/* + * Demangle C++ function signature, typically replaced by demangle-cxx.cpp + * version. + */ +#ifndef HAVE_CXA_DEMANGLE_SUPPORT +char *cxx_demangle_sym(const char *str __maybe_unused, bool params __maybe_unused, + bool modifiers __maybe_unused) +{ +#ifdef HAVE_LIBBFD_SUPPORT + int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); + + return bfd_demangle(NULL, str, flags); +#elif defined(HAVE_CPLUS_DEMANGLE_SUPPORT) + int flags = (params ? DMGL_PARAMS : 0) | (modifiers ? DMGL_ANSI : 0); + + return cplus_demangle(str, flags); +#else + return NULL; +#endif +} +#endif /* !HAVE_CXA_DEMANGLE_SUPPORT */ + +char *dso__demangle_sym(struct dso *dso, int kmodule, const char *elf_name) +{ + struct demangle rust_demangle = { + .style = DemangleStyleUnknown, + }; + char *demangled = NULL; + + /* + * We need to figure out if the object was created from C++ sources + * DWARF DW_compile_unit has this, but we don't always have access + * to it... + */ + if (!want_demangle((dso && dso__kernel(dso)) || kmodule)) + return demangled; + + rust_demangle_demangle(elf_name, &rust_demangle); + if (rust_demangle_is_known(&rust_demangle)) { + /* A rust mangled name. */ + if (rust_demangle.mangled_len == 0) + return demangled; + + for (size_t buf_len = roundup_pow_of_two(rust_demangle.mangled_len * 2); + buf_len < 1024 * 1024; buf_len += 32) { + char *tmp = realloc(demangled, buf_len); + + if (!tmp) { + /* Failure to grow output buffer, return what is there. */ + return demangled; + } + demangled = tmp; + if (rust_demangle_display_demangle(&rust_demangle, demangled, buf_len, + /*alternate=*/true) == OverflowOk) + return demangled; + } + /* Buffer exceeded sensible bounds, return what is there. */ + return demangled; + } + + demangled = cxx_demangle_sym(elf_name, verbose > 0, verbose > 0); + if (demangled) + return demangled; + + demangled = ocaml_demangle_sym(elf_name); + if (demangled) + return demangled; + + return java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET); +} diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 89585f53c1d5..ffb48cc2103f 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -410,7 +410,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, bo } void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al) + bool symbols, struct addr_location *al) { size_t i; const u8 cpumodes[] = { @@ -421,7 +421,11 @@ void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, }; for (i = 0; i < ARRAY_SIZE(cpumodes); i++) { - thread__find_symbol(thread, cpumodes[i], addr, al); + if (symbols) + thread__find_symbol(thread, cpumodes[i], addr, al); + else + thread__find_map(thread, cpumodes[i], addr, al); + if (al->map) break; } @@ -471,6 +475,7 @@ uint16_t thread__e_machine(struct thread *thread, struct machine *machine) if (parent) { e_machine = thread__e_machine(parent, machine); + thread__put(parent); thread__set_e_machine(thread, e_machine); return e_machine; } diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index cd574a896418..2b90bbed7a61 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -126,7 +126,7 @@ struct symbol *thread__find_symbol_fb(struct thread *thread, u8 cpumode, u64 addr, struct addr_location *al); void thread__find_cpumode_addr_location(struct thread *thread, u64 addr, - struct addr_location *al); + bool symbols, struct addr_location *al); int thread__memcpy(struct thread *thread, struct machine *machine, void *buf, u64 ip, int len, bool *is64bit); diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index 3b7f390f26eb..37bd8ac63b01 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -43,8 +43,15 @@ static int perf_session__process_compressed_event(struct perf_session *session, decomp->size = decomp_last_rem; } - src = (void *)event + sizeof(struct perf_record_compressed); - src_size = event->pack.header.size - sizeof(struct perf_record_compressed); + if (event->header.type == PERF_RECORD_COMPRESSED) { + src = (void *)event + sizeof(struct perf_record_compressed); + src_size = event->pack.header.size - sizeof(struct perf_record_compressed); + } else if (event->header.type == PERF_RECORD_COMPRESSED2) { + src = (void *)event + sizeof(struct perf_record_compressed2); + src_size = event->pack2.data_size; + } else { + return -1; + } decomp_size = zstd_decompress_stream(session->active_decomp->zstd_decomp, src, src_size, &(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem); diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c index 97b327d1ce4a..4630b8cc8e52 100644 --- a/tools/perf/util/tool_pmu.c +++ b/tools/perf/util/tool_pmu.c @@ -486,8 +486,14 @@ int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) delta_start *= 1000000000 / ticks_per_sec; } count->val = delta_start; - count->ena = count->run = delta_start; count->lost = 0; + /* + * The values of enabled and running must make a ratio of 100%. The + * exact values don't matter as long as they are non-zero to avoid + * issues with evsel__count_has_error. + */ + count->ena++; + count->run++; return 0; } @@ -496,19 +502,12 @@ struct perf_pmu *tool_pmu__new(void) struct perf_pmu *tool = zalloc(sizeof(struct perf_pmu)); if (!tool) - goto out; - tool->name = strdup("tool"); - if (!tool->name) { - zfree(&tool); - goto out; - } + return NULL; - tool->type = PERF_PMU_TYPE_TOOL; - INIT_LIST_HEAD(&tool->aliases); - INIT_LIST_HEAD(&tool->caps); - INIT_LIST_HEAD(&tool->format); + if (perf_pmu__init(tool, PERF_PMU_TYPE_TOOL, "tool") != 0) { + perf_pmu__delete(tool); + return NULL; + } tool->events_table = find_core_events_table("common", "common"); - -out: return tool; } diff --git a/tools/perf/util/trace.h b/tools/perf/util/trace.h new file mode 100644 index 000000000000..fa8d480527a2 --- /dev/null +++ b/tools/perf/util/trace.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef UTIL_TRACE_H +#define UTIL_TRACE_H + +#include <stdio.h> /* for FILE */ + +enum trace_summary_mode { + SUMMARY__NONE = 0, + SUMMARY__BY_TOTAL, + SUMMARY__BY_THREAD, + SUMMARY__BY_CGROUP, +}; + +#ifdef HAVE_BPF_SKEL + +int trace_prepare_bpf_summary(enum trace_summary_mode mode); +void trace_start_bpf_summary(void); +void trace_end_bpf_summary(void); +int trace_print_bpf_summary(FILE *fp); +void trace_cleanup_bpf_summary(void); + +#else /* !HAVE_BPF_SKEL */ + +static inline int trace_prepare_bpf_summary(enum trace_summary_mode mode __maybe_unused) +{ + return -1; +} +static inline void trace_start_bpf_summary(void) {} +static inline void trace_end_bpf_summary(void) {} +static inline int trace_print_bpf_summary(FILE *fp __maybe_unused) +{ + return 0; +} +static inline void trace_cleanup_bpf_summary(void) {} + +#endif /* HAVE_BPF_SKEL */ + +#endif /* UTIL_TRACE_H */ |