summaryrefslogtreecommitdiff
path: root/tools/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-24 05:45:40 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-24 05:45:40 -0800
commit7685b334d1e4927cc73b62c65293ba65748d9c52 (patch)
treef7490fa318bf9c8079d5fb274646ef6a6aa1b86f /tools/lib
parentbc8198dc7ebc492ec3e9fa1617dcdfbe98e73b17 (diff)
parent91b7747dc70d64b5ec56ffe493310f207e7ffc99 (diff)
Merge tag 'perf-tools-for-v6.14-2025-01-21' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools
Pull perf-tools updates from Namhyung Kim: "There are a lot of changes in the perf tools in this cycle. build: - Use generic syscall table to generate syscall numbers on supported archs - This also enables to get rid of libaudit which was used for syscall numbers - Remove python2 support as it's deprecated for years - Fix issues on static build with libzstd perf record: - Intel-PT supports "aux-action" config term to pause or resume tracing in the aux-buffer. Users can start the intel_pt event as "started-paused" and configure other events to control the Intel-PT tracing: # perf record --kcore -e intel_pt/aux-action=start-paused/ \ -e syscalls:sys_enter_newuname/aux-action=resume/ \ -e syscalls:sys_exit_newuname/aux-action=pause/ -- uname This requires kernel support (which was added in v6.13) perf lock: - 'perf lock contention' command has an ability to symbolize locks in dynamically allocated objects using slab cache name when it runs with BPF. Those dynamic locks would have "&" prefix in the name to distinguish them from ordinary (static) locks # perf lock con -abl -E 5 sleep 1 contended total wait max wait avg wait address symbol 2 1.95 us 1.77 us 975 ns ffff9d5e852d3498 &task_struct (mutex) 1 1.18 us 1.18 us 1.18 us ffff9d5e852d3538 &task_struct (mutex) 4 1.12 us 354 ns 279 ns ffff9d5e841ca800 &kmalloc-cg-512 (mutex) 2 859 ns 617 ns 429 ns ffffffffa41c3620 delayed_uprobe_lock (mutex) 3 691 ns 388 ns 230 ns ffffffffa41c0940 pack_mutex (mutex) This also requires kernel/BPF support (which was added in v6.13) perf ftrace: - 'perf ftrace latency' command gets a couple of options to support linear buckets instead of exponential. Also it's possible to specify max and min latency for the linear buckets: # perf ftrace latency -abn -T switch_mm_irqs_off --bucket-range=100 \ --min-latency=200 --max-latency=800 -- sleep 1 # DURATION | COUNT | GRAPH | 0 - 200 ns | 186 | ### | 200 - 300 ns | 256 | ##### | 300 - 400 ns | 364 | ####### | 400 - 500 ns | 223 | #### | 500 - 600 ns | 111 | ## | 600 - 700 ns | 41 | | 700 - 800 ns | 141 | ## | 800 - ... ns | 169 | ### | # statistics (in nsec) total time: 2162212 avg time: 967 max time: 16817 min time: 132 count: 2236 - As you can see in the above example, it nows shows the statistics at the end so that users can see the avg/max/min latencies easily - 'perf ftrace profile' command has --graph-opts option like 'perf ftrace trace' so that it can control the tracing behaviors in the same way. For example, it can limit the function call depth or threshold perf script: - Improve physical memory resolution in 'mem-phys-addr' script by parsing /proc/iomem file # perf script mem-phys-addr -- find / ... Event: mem_inst_retired.all_loads:P Memory type count percentage ---------------------------------------- ---------- ---------- 100000000-85f7fffff : System RAM 8929 69.7 547600000-54785d23f : Kernel data 1240 9.7 546a00000-5474bdfff : Kernel rodata 490 3.8 5480ce000-5485fffff : Kernel bss 121 0.9 0-fff : Reserved 3860 30.1 100000-89c01fff : System RAM 18 0.1 8a22c000-8df6efff : System RAM 5 0.0 Others: - 'perf test' gets --runs-per-test option to run the test cases repeatedly. This would be helpful to see if it's flaky - Add 'parse_events' method to Python perf extension module, so that users can use the same event parsing logic in the python code. One more step towards implementing perf tools in Python. :) - Support opening tracepoint events without libtraceevent. This will be helpful if it won't use the tracing data like in 'perf stat' - Update ARM Neoverse N2/V2 JSON events and metrics" * tag 'perf-tools-for-v6.14-2025-01-21' of git://git.kernel.org/pub/scm/linux/kernel/git/perf/perf-tools: (176 commits) perf test: Update event_groups test to use instructions perf bench: Fix undefined behavior in cmpworker() perf annotate: Prefer passing evsel to evsel->core.idx perf lock: Rename fields in lock_type_table perf lock: Add percpu-rwsem for type filter perf lock: Fix parse_lock_type which only retrieve one lock flag perf lock: Fix return code for functions in __cmd_contention perf hist: Fix width calculation in hpp__fmt() perf hist: Fix bogus profiles when filters are enabled perf hist: Deduplicate cmp/sort/collapse code perf test: Improve verbose documentation perf test: Add a runs-per-test flag perf test: Fix parallel/sequential option documentation perf test: Send list output to stdout rather than stderr perf test: Rename functions and variables for better clarity perf tools: Expose quiet/verbose variables in Makefile.perf perf config: Add a function to set one variable in .perfconfig perf test perftool_testsuite: Return correct value for skipping perf test perftool_testsuite: Add missing description perf test record+probe_libc_inet_pton: Make test resilient ...
Diffstat (limited to 'tools/lib')
-rw-r--r--tools/lib/api/fs/fs.c6
-rw-r--r--tools/lib/perf/Documentation/libperf.txt1
-rw-r--r--tools/lib/perf/cpumap.c131
-rw-r--r--tools/lib/perf/evlist.c2
-rw-r--r--tools/lib/perf/include/internal/cpumap.h4
-rw-r--r--tools/lib/perf/include/perf/cpumap.h6
-rw-r--r--tools/lib/perf/libperf.map1
7 files changed, 48 insertions, 103 deletions
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 337fde770e45..edec23406dbc 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -296,7 +296,7 @@ int filename__read_int(const char *filename, int *value)
int fd = open(filename, O_RDONLY), err = -1;
if (fd < 0)
- return -1;
+ return -errno;
if (read(fd, line, sizeof(line)) > 0) {
*value = atoi(line);
@@ -314,7 +314,7 @@ static int filename__read_ull_base(const char *filename,
int fd = open(filename, O_RDONLY), err = -1;
if (fd < 0)
- return -1;
+ return -errno;
if (read(fd, line, sizeof(line)) > 0) {
*value = strtoull(line, NULL, base);
@@ -372,7 +372,7 @@ int filename__write_int(const char *filename, int value)
char buf[64];
if (fd < 0)
- return err;
+ return -errno;
sprintf(buf, "%d", value);
if (write(fd, buf, sizeof(buf)) == sizeof(buf))
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index fcfb9499ef9c..59aabdd3cabf 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -39,7 +39,6 @@ SYNOPSIS
struct perf_cpu_map *perf_cpu_map__new_any_cpu(void);
struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
- struct perf_cpu_map *perf_cpu_map__read(FILE *file);
struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index cae799ad44e1..fcc47214062a 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
#include <perf/cpumap.h>
#include <stdlib.h>
#include <linux/refcount.h>
@@ -10,6 +11,9 @@
#include <ctype.h>
#include <limits.h>
#include "internal.h"
+#include <api/fs/fs.h>
+
+#define MAX_NR_CPUS 4096
void perf_cpu_map__set_nr(struct perf_cpu_map *map, int nr_cpus)
{
@@ -100,12 +104,12 @@ static struct perf_cpu_map *cpu_map__new_sysconf(void)
static struct perf_cpu_map *cpu_map__new_sysfs_online(void)
{
struct perf_cpu_map *cpus = NULL;
- FILE *onlnf;
+ char *buf = NULL;
+ size_t buf_len;
- onlnf = fopen("/sys/devices/system/cpu/online", "r");
- if (onlnf) {
- cpus = perf_cpu_map__read(onlnf);
- fclose(onlnf);
+ if (sysfs__read_str("devices/system/cpu/online", &buf, &buf_len) >= 0) {
+ cpus = perf_cpu_map__new(buf);
+ free(buf);
}
return cpus;
}
@@ -158,62 +162,6 @@ static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu
return cpus;
}
-struct perf_cpu_map *perf_cpu_map__read(FILE *file)
-{
- struct perf_cpu_map *cpus = NULL;
- int nr_cpus = 0;
- struct perf_cpu *tmp_cpus = NULL, *tmp;
- int max_entries = 0;
- int n, cpu, prev;
- char sep;
-
- sep = 0;
- prev = -1;
- for (;;) {
- n = fscanf(file, "%u%c", &cpu, &sep);
- if (n <= 0)
- break;
- if (prev >= 0) {
- int new_max = nr_cpus + cpu - prev - 1;
-
- WARN_ONCE(new_max >= MAX_NR_CPUS, "Perf can support %d CPUs. "
- "Consider raising MAX_NR_CPUS\n", MAX_NR_CPUS);
-
- if (new_max >= max_entries) {
- max_entries = new_max + MAX_NR_CPUS / 2;
- tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
- if (tmp == NULL)
- goto out_free_tmp;
- tmp_cpus = tmp;
- }
-
- while (++prev < cpu)
- tmp_cpus[nr_cpus++].cpu = prev;
- }
- if (nr_cpus == max_entries) {
- max_entries += MAX_NR_CPUS;
- tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
- if (tmp == NULL)
- goto out_free_tmp;
- tmp_cpus = tmp;
- }
-
- tmp_cpus[nr_cpus++].cpu = cpu;
- if (n == 2 && sep == '-')
- prev = cpu;
- else
- prev = -1;
- if (n == 1 || sep == '\n')
- break;
- }
-
- if (nr_cpus > 0)
- cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
-out_free_tmp:
- free(tmp_cpus);
- return cpus;
-}
-
struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
{
struct perf_cpu_map *cpus = NULL;
@@ -238,7 +186,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
p = NULL;
start_cpu = strtoul(cpu_list, &p, 0);
if (start_cpu >= INT_MAX
- || (*p != '\0' && *p != ',' && *p != '-'))
+ || (*p != '\0' && *p != ',' && *p != '-' && *p != '\n'))
goto invalid;
if (*p == '-') {
@@ -246,7 +194,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
p = NULL;
end_cpu = strtoul(cpu_list, &p, 0);
- if (end_cpu >= INT_MAX || (*p != '\0' && *p != ','))
+ if (end_cpu >= INT_MAX || (*p != '\0' && *p != ',' && *p != '\n'))
goto invalid;
if (end_cpu < start_cpu)
@@ -265,7 +213,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
goto invalid;
if (nr_cpus == max_entries) {
- max_entries += MAX_NR_CPUS;
+ max_entries += max(end_cpu - start_cpu + 1, 16UL);
tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));
if (tmp == NULL)
goto invalid;
@@ -279,14 +227,15 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)
cpu_list = p;
}
- if (nr_cpus > 0)
+ if (nr_cpus > 0) {
cpus = cpu_map__trim_new(nr_cpus, tmp_cpus);
- else if (*cpu_list != '\0') {
+ } else if (*cpu_list != '\0') {
pr_warning("Unexpected characters at end of cpu list ('%s'), using online CPUs.",
cpu_list);
cpus = perf_cpu_map__new_online_cpus();
- } else
+ } else {
cpus = perf_cpu_map__new_any_cpu();
+ }
invalid:
free(tmp_cpus);
out:
@@ -436,46 +385,49 @@ bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu
}
/*
- * Merge two cpumaps
+ * Merge two cpumaps.
+ *
+ * If 'other' is subset of '*orig', '*orig' keeps itself with no reference count
+ * change (similar to "realloc").
+ *
+ * If '*orig' is subset of 'other', '*orig' reuses 'other' with its reference
+ * count increased.
*
- * orig either gets freed and replaced with a new map, or reused
- * with no reference count change (similar to "realloc")
- * other has its reference count increased.
+ * Otherwise, '*orig' gets freed and replaced with a new map.
*/
-
-struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
- struct perf_cpu_map *other)
+int perf_cpu_map__merge(struct perf_cpu_map **orig, struct perf_cpu_map *other)
{
struct perf_cpu *tmp_cpus;
int tmp_len;
int i, j, k;
struct perf_cpu_map *merged;
- if (perf_cpu_map__is_subset(orig, other))
- return orig;
- if (perf_cpu_map__is_subset(other, orig)) {
- perf_cpu_map__put(orig);
- return perf_cpu_map__get(other);
+ if (perf_cpu_map__is_subset(*orig, other))
+ return 0;
+ if (perf_cpu_map__is_subset(other, *orig)) {
+ perf_cpu_map__put(*orig);
+ *orig = perf_cpu_map__get(other);
+ return 0;
}
- tmp_len = __perf_cpu_map__nr(orig) + __perf_cpu_map__nr(other);
+ tmp_len = __perf_cpu_map__nr(*orig) + __perf_cpu_map__nr(other);
tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));
if (!tmp_cpus)
- return NULL;
+ return -ENOMEM;
/* Standard merge algorithm from wikipedia */
i = j = k = 0;
- while (i < __perf_cpu_map__nr(orig) && j < __perf_cpu_map__nr(other)) {
- if (__perf_cpu_map__cpu(orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) {
- if (__perf_cpu_map__cpu(orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)
+ while (i < __perf_cpu_map__nr(*orig) && j < __perf_cpu_map__nr(other)) {
+ if (__perf_cpu_map__cpu(*orig, i).cpu <= __perf_cpu_map__cpu(other, j).cpu) {
+ if (__perf_cpu_map__cpu(*orig, i).cpu == __perf_cpu_map__cpu(other, j).cpu)
j++;
- tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
+ tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++);
} else
tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
}
- while (i < __perf_cpu_map__nr(orig))
- tmp_cpus[k++] = __perf_cpu_map__cpu(orig, i++);
+ while (i < __perf_cpu_map__nr(*orig))
+ tmp_cpus[k++] = __perf_cpu_map__cpu(*orig, i++);
while (j < __perf_cpu_map__nr(other))
tmp_cpus[k++] = __perf_cpu_map__cpu(other, j++);
@@ -483,8 +435,9 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
merged = cpu_map__trim_new(k, tmp_cpus);
free(tmp_cpus);
- perf_cpu_map__put(orig);
- return merged;
+ perf_cpu_map__put(*orig);
+ *orig = merged;
+ return 0;
}
struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 83c43dc13313..b1f4c8176b32 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -89,7 +89,7 @@ static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
evsel->threads = perf_thread_map__get(evlist->threads);
}
- evlist->all_cpus = perf_cpu_map__merge(evlist->all_cpus, evsel->cpus);
+ perf_cpu_map__merge(&evlist->all_cpus, evsel->cpus);
}
static void perf_evlist__propagate_maps(struct perf_evlist *evlist)
diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h
index 49649eb51ce4..e2be2d17c32b 100644
--- a/tools/lib/perf/include/internal/cpumap.h
+++ b/tools/lib/perf/include/internal/cpumap.h
@@ -21,10 +21,6 @@ DECLARE_RC_STRUCT(perf_cpu_map) {
struct perf_cpu map[];
};
-#ifndef MAX_NR_CPUS
-#define MAX_NR_CPUS 2048
-#endif
-
struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus);
int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);
bool perf_cpu_map__is_subset(const struct perf_cpu_map *a, const struct perf_cpu_map *b);
diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 90457d17fb2f..188a667babc6 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -3,7 +3,6 @@
#define __LIBPERF_CPUMAP_H
#include <perf/core.h>
-#include <stdio.h>
#include <stdbool.h>
/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */
@@ -37,10 +36,9 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__new_online_cpus(void);
* perf_cpu_map__new_online_cpus is returned.
*/
LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__read(FILE *file);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);
-LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,
- struct perf_cpu_map *other);
+LIBPERF_API int perf_cpu_map__merge(struct perf_cpu_map **orig,
+ struct perf_cpu_map *other);
LIBPERF_API struct perf_cpu_map *perf_cpu_map__intersect(struct perf_cpu_map *orig,
struct perf_cpu_map *other);
LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map);
diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map
index 2aa79b696032..fdd8304fe9d0 100644
--- a/tools/lib/perf/libperf.map
+++ b/tools/lib/perf/libperf.map
@@ -6,7 +6,6 @@ LIBPERF_0.0.1 {
perf_cpu_map__get;
perf_cpu_map__put;
perf_cpu_map__new;
- perf_cpu_map__read;
perf_cpu_map__nr;
perf_cpu_map__cpu;
perf_cpu_map__has_any_cpu_or_is_empty;