summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDmitry Vyukov <dvyukov@google.com>2025-02-13 10:08:14 +0100
committerNamhyung Kim <namhyung@kernel.org>2025-02-17 22:00:50 -0800
commitf13bc61b2e37957bf6e693ab5650d93fd21523f4 (patch)
tree4a38f76d74609dc76aa45e4c33bff6421cf04198
parent20600b8aab734877740d5292c0cdd5ccb6c7beb7 (diff)
perf report: Add machine parallelism
Add calculation of the current parallelism level (number of threads actively running on CPUs). The parallelism level can be shown in reports on its own, and to calculate latency overheads. Signed-off-by: Dmitry Vyukov <dvyukov@google.com> Reviewed-by: Andi Kleen <ak@linux.intel.com> Link: https://lore.kernel.org/r/0f8c1b8eb12619029e31b3d5c0346f4616a5aeda.1739437531.git.dvyukov@google.com Signed-off-by: Namhyung Kim <namhyung@kernel.org>
-rw-r--r--tools/perf/builtin-report.c1
-rw-r--r--tools/perf/util/addr_location.c1
-rw-r--r--tools/perf/util/addr_location.h2
-rw-r--r--tools/perf/util/event.c3
-rw-r--r--tools/perf/util/machine.c7
-rw-r--r--tools/perf/util/machine.h6
6 files changed, 20 insertions, 0 deletions
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index f5fbd670d619..0d9bd090eda7 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1568,6 +1568,7 @@ repeat:
report.tool.cgroup = perf_event__process_cgroup;
report.tool.exit = perf_event__process_exit;
report.tool.fork = perf_event__process_fork;
+ report.tool.context_switch = perf_event__process_switch;
report.tool.lost = perf_event__process_lost;
report.tool.read = process_read_event;
report.tool.attr = process_attr;
diff --git a/tools/perf/util/addr_location.c b/tools/perf/util/addr_location.c
index 51825ef8c0ab..007a2f5df9a6 100644
--- a/tools/perf/util/addr_location.c
+++ b/tools/perf/util/addr_location.c
@@ -17,6 +17,7 @@ void addr_location__init(struct addr_location *al)
al->cpumode = 0;
al->cpu = 0;
al->socket = 0;
+ al->parallelism = 1;
}
/*
diff --git a/tools/perf/util/addr_location.h b/tools/perf/util/addr_location.h
index d8ac0428dff2..36aaa45445f2 100644
--- a/tools/perf/util/addr_location.h
+++ b/tools/perf/util/addr_location.h
@@ -21,6 +21,8 @@ struct addr_location {
u8 cpumode;
s32 cpu;
s32 socket;
+ /* Same as machine.parallelism but within [1, nr_cpus]. */
+ int parallelism;
};
void addr_location__init(struct addr_location *al);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index aac96d5d1917..2f10e3115757 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -767,6 +767,9 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
al->socket = env->cpu[al->cpu].socket_id;
}
+ /* Account for possible out-of-order switch events. */
+ al->parallelism = max(1, min(machine->parallelism, machine__nr_cpus_avail(machine)));
+
if (al->map) {
if (symbol_conf.dso_list &&
(!dso || !(strlist__has_entry(symbol_conf.dso_list,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 55d4977b9913..d96cbfd97ad8 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -94,6 +94,8 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
machine->comm_exec = false;
machine->kernel_start = 0;
machine->vmlinux_map = NULL;
+ /* There is no initial context switch in, so we start at 1. */
+ machine->parallelism = 1;
machine->root_dir = strdup(root_dir);
if (machine->root_dir == NULL)
@@ -677,8 +679,11 @@ int machine__process_aux_output_hw_id_event(struct machine *machine __maybe_unus
int machine__process_switch_event(struct machine *machine __maybe_unused,
union perf_event *event)
{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+
if (dump_trace)
perf_event__fprintf_switch(event, stdout);
+ machine->parallelism += out ? -1 : 1;
return 0;
}
@@ -1880,6 +1885,8 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
if (dump_trace)
perf_event__fprintf_task(event, stdout);
+ /* There is no context switch out before exit, so we decrement here. */
+ machine->parallelism--;
if (thread != NULL) {
if (symbol_conf.keep_exited_threads)
thread__set_exited(thread, /*exited=*/true);
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index ae3e5542d57d..b56abec84fed 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -50,6 +50,12 @@ struct machine {
u64 text_start;
u64 text_end;
} sched, lock, traceiter, trace;
+ /*
+ * The current parallelism level (number of threads that run on CPUs).
+ * This value can be less than 1, or larger than the total number
+ * of CPUs, if events are poorly ordered.
+ */
+ int parallelism;
pid_t *current_tid;
size_t current_tid_sz;
union { /* Tool specific area */