diff options
Diffstat (limited to 'tools/perf/builtin-trace.c')
-rw-r--r-- | tools/perf/builtin-trace.c | 769 |
1 files changed, 551 insertions, 218 deletions
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 6a1a128fe645..2ab1b8e05ad3 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -39,6 +39,7 @@ #include "util/synthetic-events.h" #include "util/evlist.h" #include "util/evswitch.h" +#include "util/hashmap.h" #include "util/mmap.h" #include <subcmd/pager.h> #include <subcmd/exec-cmd.h> @@ -54,6 +55,7 @@ #include "util/thread_map.h" #include "util/stat.h" #include "util/tool.h" +#include "util/trace.h" #include "util/util.h" #include "trace/beauty/beauty.h" #include "trace-event.h" @@ -63,9 +65,9 @@ #include "print_binary.h" #include "string2.h" #include "syscalltbl.h" -#include "rb_resort.h" #include "../perf.h" #include "trace_augment.h" +#include "dwarf-regs.h" #include <errno.h> #include <inttypes.h> @@ -86,6 +88,7 @@ #include <linux/ctype.h> #include <perf/mmap.h> +#include <tools/libc_compat.h> #ifdef HAVE_LIBTRACEEVENT #include <event-parse.h> @@ -141,9 +144,11 @@ struct syscall_fmt { struct trace { struct perf_tool tool; - struct syscalltbl *sctbl; struct { - struct syscall *table; + /** Sorted sycall numbers used by the trace. */ + struct syscall **table; + /** Size of table. */ + size_t table_size; struct { struct evsel *sys_enter, *sys_exit, @@ -177,14 +182,25 @@ struct trace { pid_t *entries; struct bpf_map *map; } filter_pids; + /* + * TODO: The map is from an ID (aka system call number) to struct + * syscall_stats. If there is >1 e_machine, such as i386 and x86-64 + * processes, then the stats here will gather wrong the statistics for + * the non EM_HOST system calls. A fix would be to add the e_machine + * into the key, but this would make the code inconsistent with the + * per-thread version. + */ + struct hashmap *syscall_stats; double duration_filter; double runtime_ms; + unsigned long pfmaj, pfmin; struct { u64 vfs_getname, proc_getname; } stats; unsigned int max_stack; unsigned int min_stack; + enum trace_summary_mode summary_mode; int raw_augmented_syscalls_args_size; bool raw_augmented_syscalls; bool fd_path_disabled; @@ -213,6 +229,7 @@ struct trace { bool force; bool vfs_getname; bool force_btf; + bool summary_bpf; int trace_pgfaults; char *perfconfig_events; struct { @@ -389,7 +406,12 @@ static struct syscall_arg_fmt *evsel__syscall_arg_fmt(struct evsel *evsel) } if (et->fmt == NULL) { - et->fmt = calloc(evsel->tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt)); + const struct tep_event *tp_format = evsel__tp_format(evsel); + + if (tp_format == NULL) + goto out_delete; + + et->fmt = calloc(tp_format->format.nr_fields, sizeof(struct syscall_arg_fmt)); if (et->fmt == NULL) goto out_delete; } @@ -1108,7 +1130,6 @@ static bool syscall_arg__strtoul_btf_type(char *bf __maybe_unused, size_t size _ .strtoul = STUL_STRARRAY_FLAGS, \ .parm = &strarray__##array, } -#include "trace/beauty/arch_errno_names.c" #include "trace/beauty/eventfd.c" #include "trace/beauty/futex_op.c" #include "trace/beauty/futex_val3.c" @@ -1327,7 +1348,7 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_FDAT, /* olddirfd */ }, [2] = { .scnprintf = SCA_FDAT, /* newdirfd */ }, [4] = { .scnprintf = SCA_RENAMEAT2_FLAGS, /* flags */ }, }, }, - { .name = "rseq", .errpid = true, + { .name = "rseq", .arg = { [0] = { .from_user = true /* rseq */, }, }, }, { .name = "rt_sigaction", .arg = { [0] = { .scnprintf = SCA_SIGNUM, /* sig */ }, }, }, @@ -1351,7 +1372,7 @@ static const struct syscall_fmt syscall_fmts[] = { { .name = "sendto", .arg = { [3] = { .scnprintf = SCA_MSG_FLAGS, /* flags */ }, [4] = SCA_SOCKADDR_FROM_USER(addr), }, }, - { .name = "set_robust_list", .errpid = true, + { .name = "set_robust_list", .arg = { [0] = { .from_user = true /* head */, }, }, }, { .name = "set_tid_address", .errpid = true, }, { .name = "setitimer", @@ -1441,22 +1462,37 @@ static const struct syscall_fmt *syscall_fmt__find_by_alias(const char *alias) return __syscall_fmt__find_by_alias(syscall_fmts, nmemb, alias); } -/* - * is_exit: is this "exit" or "exit_group"? - * is_open: is this "open" or "openat"? To associate the fd returned in sys_exit with the pathname in sys_enter. - * args_size: sum of the sizes of the syscall arguments, anything after that is augmented stuff: pathname for openat, etc. - * nonexistent: Just a hole in the syscall table, syscall id not allocated +/** + * struct syscall */ struct syscall { + /** @e_machine: The ELF machine associated with the entry. */ + int e_machine; + /** @id: id value from the tracepoint, the system call number. */ + int id; struct tep_event *tp_format; int nr_args; + /** + * @args_size: sum of the sizes of the syscall arguments, anything + * after that is augmented stuff: pathname for openat, etc. + */ + int args_size; struct { struct bpf_program *sys_enter, *sys_exit; } bpf_prog; + /** @is_exit: is this "exit" or "exit_group"? */ bool is_exit; + /** + * @is_open: is this "open" or "openat"? To associate the fd returned in + * sys_exit with the pathname in sys_enter. + */ bool is_open; + /** + * @nonexistent: Name lookup failed. Just a hole in the syscall table, + * syscall id not allocated. + */ bool nonexistent; bool use_btf; struct tep_format_field *args; @@ -1515,16 +1551,48 @@ struct thread_trace { struct file *table; } files; - struct intlist *syscall_stats; + struct hashmap *syscall_stats; }; -static struct thread_trace *thread_trace__new(void) +static size_t syscall_id_hash(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool syscall_id_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +static struct hashmap *alloc_syscall_stats(void) +{ + return hashmap__new(syscall_id_hash, syscall_id_equal, NULL); +} + +static void delete_syscall_stats(struct hashmap *syscall_stats) +{ + struct hashmap_entry *pos; + size_t bkt; + + if (syscall_stats == NULL) + return; + + hashmap__for_each_entry(syscall_stats, pos, bkt) + zfree(&pos->pvalue); + hashmap__free(syscall_stats); +} + +static struct thread_trace *thread_trace__new(struct trace *trace) { struct thread_trace *ttrace = zalloc(sizeof(struct thread_trace)); if (ttrace) { ttrace->files.max = -1; - ttrace->syscall_stats = intlist__new(NULL); + if (trace->summary) { + ttrace->syscall_stats = alloc_syscall_stats(); + if (IS_ERR(ttrace->syscall_stats)) + zfree(&ttrace); + } } return ttrace; @@ -1539,14 +1607,14 @@ static void thread_trace__delete(void *pttrace) if (!ttrace) return; - intlist__delete(ttrace->syscall_stats); + delete_syscall_stats(ttrace->syscall_stats); ttrace->syscall_stats = NULL; thread_trace__free_files(ttrace); zfree(&ttrace->entry_str); free(ttrace); } -static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) +static struct thread_trace *thread__trace(struct thread *thread, struct trace *trace) { struct thread_trace *ttrace; @@ -1554,7 +1622,7 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) goto fail; if (thread__priv(thread) == NULL) - thread__set_priv(thread, thread_trace__new()); + thread__set_priv(thread, thread_trace__new(trace)); if (thread__priv(thread) == NULL) goto fail; @@ -1564,7 +1632,7 @@ static struct thread_trace *thread__trace(struct thread *thread, FILE *fp) return ttrace; fail: - color_fprintf(fp, PERF_COLOR_RED, + color_fprintf(trace->output, PERF_COLOR_RED, "WARNING: not enough memory, dropping samples!\n"); return NULL; } @@ -1585,7 +1653,7 @@ static const size_t trace__entry_str_size = 2048; static void thread_trace__free_files(struct thread_trace *ttrace) { - for (int i = 0; i < ttrace->files.max; ++i) { + for (int i = 0; i <= ttrace->files.max; ++i) { struct file *file = ttrace->files.table + i; zfree(&file->pathname); } @@ -1631,6 +1699,7 @@ static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pat if (file != NULL) { struct stat st; + if (stat(pathname, &st) == 0) file->dev_maj = major(st.st_rdev); file->pathname = strdup(pathname); @@ -2062,41 +2131,21 @@ static int syscall__set_arg_fmts(struct syscall *sc) return 0; } -static int trace__read_syscall_info(struct trace *trace, int id) +static int syscall__read_info(struct syscall *sc, struct trace *trace) { char tp_name[128]; - struct syscall *sc; - const char *name = syscalltbl__name(trace->sctbl, id); + const char *name; int err; -#ifdef HAVE_SYSCALL_TABLE_SUPPORT - if (trace->syscalls.table == NULL) { - trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc)); - if (trace->syscalls.table == NULL) - return -ENOMEM; - } -#else - if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) { - // When using libaudit we don't know beforehand what is the max syscall id - struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc)); - - if (table == NULL) - return -ENOMEM; - - // Need to memset from offset 0 and +1 members if brand new - if (trace->syscalls.table == NULL) - memset(table, 0, (id + 1) * sizeof(*sc)); - else - memset(table + trace->sctbl->syscalls.max_id + 1, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc)); - - trace->syscalls.table = table; - trace->sctbl->syscalls.max_id = id; - } -#endif - sc = trace->syscalls.table + id; if (sc->nonexistent) return -EEXIST; + if (sc->name) { + /* Info already read. */ + return 0; + } + + name = syscalltbl__name(sc->e_machine, sc->id); if (name == NULL) { sc->nonexistent = true; return -EEXIST; @@ -2119,11 +2168,16 @@ static int trace__read_syscall_info(struct trace *trace, int id) */ if (IS_ERR(sc->tp_format)) { sc->nonexistent = true; - return PTR_ERR(sc->tp_format); + err = PTR_ERR(sc->tp_format); + sc->tp_format = NULL; + return err; } - if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? - RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields)) + /* + * The tracepoint format contains __syscall_nr field, so it's one more + * than the actual number of syscall arguments. + */ + if (syscall__alloc_arg_fmts(sc, sc->tp_format->format.nr_fields - 1)) return -ENOMEM; sc->args = sc->tp_format->format.fields; @@ -2154,8 +2208,12 @@ static int evsel__init_tp_arg_scnprintf(struct evsel *evsel, bool *use_btf) struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel); if (fmt != NULL) { - syscall_arg_fmt__init_array(fmt, evsel->tp_format->format.fields, use_btf); - return 0; + const struct tep_event *tp_format = evsel__tp_format(evsel); + + if (tp_format) { + syscall_arg_fmt__init_array(fmt, tp_format->format.fields, use_btf); + return 0; + } } return -ENOMEM; @@ -2187,10 +2245,14 @@ static int trace__validate_ev_qualifier(struct trace *trace) strlist__for_each_entry(pos, trace->ev_qualifier) { const char *sc = pos->s; - int id = syscalltbl__id(trace->sctbl, sc), match_next = -1; + /* + * TODO: Assume more than the validation/warnings are all for + * the same binary type as perf. + */ + int id = syscalltbl__id(EM_HOST, sc), match_next = -1; if (id < 0) { - id = syscalltbl__strglobmatch_first(trace->sctbl, sc, &match_next); + id = syscalltbl__strglobmatch_first(EM_HOST, sc, &match_next); if (id >= 0) goto matches; @@ -2210,7 +2272,7 @@ matches: continue; while (1) { - id = syscalltbl__strglobmatch_next(trace->sctbl, sc, &match_next); + id = syscalltbl__strglobmatch_next(EM_HOST, sc, &match_next); if (id < 0) break; if (nr_allocated == nr_used) { @@ -2408,13 +2470,92 @@ next_arg: return printed; } +static struct syscall *syscall__new(int e_machine, int id) +{ + struct syscall *sc = zalloc(sizeof(*sc)); + + if (!sc) + return NULL; + + sc->e_machine = e_machine; + sc->id = id; + return sc; +} + +static void syscall__delete(struct syscall *sc) +{ + if (!sc) + return; + + free(sc->arg_fmt); + free(sc); +} + +static int syscall__bsearch_cmp(const void *key, const void *entry) +{ + const struct syscall *a = key, *b = *((const struct syscall **)entry); + + if (a->e_machine != b->e_machine) + return a->e_machine - b->e_machine; + + return a->id - b->id; +} + +static int syscall__cmp(const void *va, const void *vb) +{ + const struct syscall *a = *((const struct syscall **)va); + const struct syscall *b = *((const struct syscall **)vb); + + if (a->e_machine != b->e_machine) + return a->e_machine - b->e_machine; + + return a->id - b->id; +} + +static struct syscall *trace__find_syscall(struct trace *trace, int e_machine, int id) +{ + struct syscall key = { + .e_machine = e_machine, + .id = id, + }; + struct syscall *sc, **tmp; + + if (trace->syscalls.table) { + struct syscall **sc_entry = bsearch(&key, trace->syscalls.table, + trace->syscalls.table_size, + sizeof(trace->syscalls.table[0]), + syscall__bsearch_cmp); + + if (sc_entry) + return *sc_entry; + } + + sc = syscall__new(e_machine, id); + if (!sc) + return NULL; + + tmp = reallocarray(trace->syscalls.table, trace->syscalls.table_size + 1, + sizeof(trace->syscalls.table[0])); + if (!tmp) { + syscall__delete(sc); + return NULL; + } + + trace->syscalls.table = tmp; + trace->syscalls.table[trace->syscalls.table_size++] = sc; + qsort(trace->syscalls.table, trace->syscalls.table_size, sizeof(trace->syscalls.table[0]), + syscall__cmp); + return sc; +} + typedef int (*tracepoint_handler)(struct trace *trace, struct evsel *evsel, union perf_event *event, struct perf_sample *sample); -static struct syscall *trace__syscall_info(struct trace *trace, - struct evsel *evsel, int id) +static struct syscall *trace__syscall_info(struct trace *trace, struct evsel *evsel, + int e_machine, int id) { + struct syscall *sc; int err = 0; if (id < 0) { @@ -2439,39 +2580,20 @@ static struct syscall *trace__syscall_info(struct trace *trace, err = -EINVAL; -#ifdef HAVE_SYSCALL_TABLE_SUPPORT - if (id > trace->sctbl->syscalls.max_id) { -#else - if (id >= trace->sctbl->syscalls.max_id) { - /* - * With libaudit we don't know beforehand what is the max_id, - * so we let trace__read_syscall_info() figure that out as we - * go on reading syscalls. - */ - err = trace__read_syscall_info(trace, id); - if (err) -#endif - goto out_cant_read; - } - - if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) && - (err = trace__read_syscall_info(trace, id)) != 0) - goto out_cant_read; - - if (trace->syscalls.table && trace->syscalls.table[id].nonexistent) - goto out_cant_read; + sc = trace__find_syscall(trace, e_machine, id); + if (sc) + err = syscall__read_info(sc, trace); - return &trace->syscalls.table[id]; - -out_cant_read: - if (verbose > 0) { + if (err && verbose > 0) { char sbuf[STRERR_BUFSIZE]; - fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, str_error_r(-err, sbuf, sizeof(sbuf))); - if (id <= trace->sctbl->syscalls.max_id && trace->syscalls.table[id].name != NULL) - fprintf(trace->output, "(%s)", trace->syscalls.table[id].name); + + fprintf(trace->output, "Problems reading syscall %d: %d (%s)", id, -err, + str_error_r(-err, sbuf, sizeof(sbuf))); + if (sc && sc->name) + fprintf(trace->output, "(%s)", sc->name); fputs(" information\n", trace->output); } - return NULL; + return err ? NULL : sc; } struct syscall_stats { @@ -2482,24 +2604,29 @@ struct syscall_stats { }; static void thread__update_stats(struct thread *thread, struct thread_trace *ttrace, - int id, struct perf_sample *sample, long err, bool errno_summary) + int id, struct perf_sample *sample, long err, + struct trace *trace) { - struct int_node *inode; - struct syscall_stats *stats; + struct hashmap *syscall_stats = ttrace->syscall_stats; + struct syscall_stats *stats = NULL; u64 duration = 0; - inode = intlist__findnew(ttrace->syscall_stats, id); - if (inode == NULL) + if (trace->summary_bpf) return; - stats = inode->priv; - if (stats == NULL) { + if (trace->summary_mode == SUMMARY__BY_TOTAL) + syscall_stats = trace->syscall_stats; + + if (!hashmap__find(syscall_stats, id, &stats)) { stats = zalloc(sizeof(*stats)); if (stats == NULL) return; init_stats(&stats->stats); - inode->priv = stats; + if (hashmap__add(syscall_stats, id, stats) < 0) { + free(stats); + return; + } } if (ttrace->entry_time && sample->time > ttrace->entry_time) @@ -2510,7 +2637,7 @@ static void thread__update_stats(struct thread *thread, struct thread_trace *ttr if (err < 0) { ++stats->nr_failures; - if (!errno_summary) + if (!trace->errno_summary) return; err = -err; @@ -2581,7 +2708,6 @@ static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel, static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sample, int *augmented_args_size, int raw_augmented_args_size) { - void *augmented_args = NULL; /* * For now with BPF raw_augmented we hook into raw_syscalls:sys_enter * and there we get all 6 syscall args plus the tracepoint common fields @@ -2599,18 +2725,24 @@ static void *syscall__augmented_args(struct syscall *sc, struct perf_sample *sam int args_size = raw_augmented_args_size ?: sc->args_size; *augmented_args_size = sample->raw_size - args_size; - if (*augmented_args_size > 0) - augmented_args = sample->raw_data + args_size; + if (*augmented_args_size > 0) { + static uintptr_t argbuf[1024]; /* assuming single-threaded */ - return augmented_args; -} + if ((size_t)(*augmented_args_size) > sizeof(argbuf)) + return NULL; -static void syscall__exit(struct syscall *sc) -{ - if (!sc) - return; + /* + * The perf ring-buffer is 8-byte aligned but sample->raw_data + * is not because it's preceded by u32 size. Later, beautifier + * will use the augmented args with stricter alignments like in + * some struct. To make sure it's aligned, let's copy the args + * into a static buffer as it's single-threaded for now. + */ + memcpy(argbuf, sample->raw_data + args_size, *augmented_args_size); - zfree(&sc->arg_fmt); + return argbuf; + } + return NULL; } static int trace__sys_enter(struct trace *trace, struct evsel *evsel, @@ -2622,16 +2754,17 @@ static int trace__sys_enter(struct trace *trace, struct evsel *evsel, int printed = 0; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; - int augmented_args_size = 0; + int augmented_args_size = 0, e_machine; void *augmented_args = NULL; - struct syscall *sc = trace__syscall_info(trace, evsel, id); + struct syscall *sc; struct thread_trace *ttrace; - if (sc == NULL) - return -1; - thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); - ttrace = thread__trace(thread, trace->output); + e_machine = thread__e_machine(thread, trace->host); + sc = trace__syscall_info(trace, evsel, e_machine, id); + if (sc == NULL) + goto out_put; + ttrace = thread__trace(thread, trace); if (ttrace == NULL) goto out_put; @@ -2698,17 +2831,19 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, struct thread_trace *ttrace; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; - struct syscall *sc = trace__syscall_info(trace, evsel, id); + struct syscall *sc; char msg[1024]; void *args, *augmented_args = NULL; - int augmented_args_size; + int augmented_args_size, e_machine; size_t printed = 0; - if (sc == NULL) - return -1; thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); - ttrace = thread__trace(thread, trace->output); + e_machine = thread__e_machine(thread, trace->host); + sc = trace__syscall_info(trace, evsel, e_machine, id); + if (sc == NULL) + goto out_put; + ttrace = thread__trace(thread, trace); /* * We need to get ttrace just to make sure it is there when syscall__scnprintf_args() * and the rest of the beautifiers accessing it via struct syscall_arg touches it. @@ -2772,15 +2907,16 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel, bool duration_calculated = false; struct thread *thread; int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1, callchain_ret = 0, printed = 0; - int alignment = trace->args_alignment; - struct syscall *sc = trace__syscall_info(trace, evsel, id); + int alignment = trace->args_alignment, e_machine; + struct syscall *sc; struct thread_trace *ttrace; - if (sc == NULL) - return -1; - thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); - ttrace = thread__trace(thread, trace->output); + e_machine = thread__e_machine(thread, trace->host); + sc = trace__syscall_info(trace, evsel, e_machine, id); + if (sc == NULL) + goto out_put; + ttrace = thread__trace(thread, trace); if (ttrace == NULL) goto out_put; @@ -2789,7 +2925,7 @@ static int trace__sys_exit(struct trace *trace, struct evsel *evsel, ret = perf_evsel__sc_tp_uint(evsel, ret, sample); if (trace->summary) - thread__update_stats(thread, ttrace, id, sample, ret, trace->errno_summary); + thread__update_stats(thread, ttrace, id, sample, ret, trace); if (!trace->fd_path_disabled && sc->is_open && ret >= 0 && ttrace->filename.pending_open) { trace__set_fd_pathname(thread, ret, ttrace->filename.name); @@ -2869,8 +3005,8 @@ errno_print: { else if (sc->fmt->errpid) { struct thread *child = machine__find_thread(trace->host, ret, ret); + fprintf(trace->output, "%ld", ret); if (child != NULL) { - fprintf(trace->output, "%ld", ret); if (thread__comm_set(child)) fprintf(trace->output, " (%s)", thread__comm_str(child)); thread__put(child); @@ -2969,7 +3105,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel, struct thread *thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); - struct thread_trace *ttrace = thread__trace(thread, trace->output); + struct thread_trace *ttrace = thread__trace(thread, trace); if (ttrace == NULL) goto out_dump; @@ -3027,7 +3163,8 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, { char bf[2048]; size_t size = sizeof(bf); - struct tep_format_field *field = evsel->tp_format->format.fields; + const struct tep_event *tp_format = evsel__tp_format(evsel); + struct tep_format_field *field = tp_format ? tp_format->format.fields : NULL; struct syscall_arg_fmt *arg = __evsel__syscall_arg_fmt(evsel); size_t printed = 0, btf_printed; unsigned long val; @@ -3088,7 +3225,7 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val); } - return printed + fprintf(trace->output, "%.*s", (int)printed, bf); + return fprintf(trace->output, "%.*s", (int)printed, bf); } static int trace__event_handler(struct trace *trace, struct evsel *evsel, @@ -3125,7 +3262,8 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, if (evsel == trace->syscalls.events.bpf_output) { int id = perf_evsel__sc_tp_uint(evsel, id, sample); - struct syscall *sc = trace__syscall_info(trace, evsel, id); + int e_machine = thread ? thread__e_machine(thread, trace->host) : EM_HOST; + struct syscall *sc = trace__syscall_info(trace, evsel, e_machine, id); if (sc) { fprintf(trace->output, "%s(", sc->name); @@ -3145,11 +3283,13 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, if (evsel__is_bpf_output(evsel)) { bpf_output__fprintf(trace, sample); - } else if (evsel->tp_format) { - if (strncmp(evsel->tp_format->name, "sys_enter_", 10) || - trace__fprintf_sys_enter(trace, evsel, sample)) { + } else { + const struct tep_event *tp_format = evsel__tp_format(evsel); + + if (tp_format && (strncmp(tp_format->name, "sys_enter_", 10) || + trace__fprintf_sys_enter(trace, evsel, sample))) { if (trace->libtraceevent_print) { - event_format__fprintf(evsel->tp_format, sample->cpu, + event_format__fprintf(tp_format, sample->cpu, sample->raw_data, sample->raw_size, trace->output); } else { @@ -3220,14 +3360,17 @@ static int trace__pgfault(struct trace *trace, } } - ttrace = thread__trace(thread, trace->output); + ttrace = thread__trace(thread, trace); if (ttrace == NULL) goto out_put; - if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) + if (evsel->core.attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) { ttrace->pfmaj++; - else + trace->pfmaj++; + } else { ttrace->pfmin++; + trace->pfmin++; + } if (trace->summary_only) goto out; @@ -3386,6 +3529,7 @@ out_free: } static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp); +static size_t trace__fprintf_total_summary(struct trace *trace, FILE *fp); static bool evlist__add_vfs_getname(struct evlist *evlist) { @@ -3626,9 +3770,9 @@ out_unaugmented: return trace->skel->progs.syscall_unaugmented; } -static void trace__init_syscall_bpf_progs(struct trace *trace, int id) +static void trace__init_syscall_bpf_progs(struct trace *trace, int e_machine, int id) { - struct syscall *sc = trace__syscall_info(trace, NULL, id); + struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, id); if (sc == NULL) return; @@ -3637,22 +3781,22 @@ static void trace__init_syscall_bpf_progs(struct trace *trace, int id) sc->bpf_prog.sys_exit = trace__find_syscall_bpf_prog(trace, sc, sc->fmt ? sc->fmt->bpf_prog_name.sys_exit : NULL, "exit"); } -static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int id) +static int trace__bpf_prog_sys_enter_fd(struct trace *trace, int e_machine, int id) { - struct syscall *sc = trace__syscall_info(trace, NULL, id); + struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, id); return sc ? bpf_program__fd(sc->bpf_prog.sys_enter) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); } -static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id) +static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int e_machine, int id) { - struct syscall *sc = trace__syscall_info(trace, NULL, id); + struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, id); return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->skel->progs.syscall_unaugmented); } -static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigned int *beauty_array) +static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int e_machine, int key, unsigned int *beauty_array) { struct tep_format_field *field; - struct syscall *sc = trace__syscall_info(trace, NULL, key); + struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, key); const struct btf_type *bt; char *struct_offset, *tmp, name[32]; bool can_augment = false; @@ -3734,7 +3878,8 @@ static int trace__bpf_sys_enter_beauty_map(struct trace *trace, int key, unsigne return -1; } -static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc) +static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, + struct syscall *sc) { struct tep_format_field *field, *candidate_field; /* @@ -3748,13 +3893,13 @@ static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace return NULL; try_to_find_pair: - for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { - int id = syscalltbl__id_at_idx(trace->sctbl, i); - struct syscall *pair = trace__syscall_info(trace, NULL, id); + for (int i = 0, num_idx = syscalltbl__num_idx(sc->e_machine); i < num_idx; ++i) { + int id = syscalltbl__id_at_idx(sc->e_machine, i); + struct syscall *pair = trace__syscall_info(trace, NULL, sc->e_machine, id); struct bpf_program *pair_prog; bool is_candidate = false; - if (pair == NULL || pair == sc || + if (pair == NULL || pair->id == sc->id || pair->bpf_prog.sys_enter == trace->skel->progs.syscall_unaugmented) continue; @@ -3825,7 +3970,8 @@ try_to_find_pair: goto next_candidate; } - pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, sc->name); + pr_debug("Reusing \"%s\" BPF sys_enter augmenter for \"%s\"\n", pair->name, + sc->name); return pair_prog; next_candidate: continue; @@ -3834,7 +3980,7 @@ try_to_find_pair: return NULL; } -static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) +static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace, int e_machine) { int map_enter_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_enter); int map_exit_fd = bpf_map__fd(trace->skel->maps.syscalls_sys_exit); @@ -3842,27 +3988,27 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) int err = 0; unsigned int beauty_array[6]; - for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { - int prog_fd, key = syscalltbl__id_at_idx(trace->sctbl, i); + for (int i = 0, num_idx = syscalltbl__num_idx(e_machine); i < num_idx; ++i) { + int prog_fd, key = syscalltbl__id_at_idx(e_machine, i); if (!trace__syscall_enabled(trace, key)) continue; - trace__init_syscall_bpf_progs(trace, key); + trace__init_syscall_bpf_progs(trace, e_machine, key); // It'll get at least the "!raw_syscalls:unaugmented" - prog_fd = trace__bpf_prog_sys_enter_fd(trace, key); + prog_fd = trace__bpf_prog_sys_enter_fd(trace, e_machine, key); err = bpf_map_update_elem(map_enter_fd, &key, &prog_fd, BPF_ANY); if (err) break; - prog_fd = trace__bpf_prog_sys_exit_fd(trace, key); + prog_fd = trace__bpf_prog_sys_exit_fd(trace, e_machine, key); err = bpf_map_update_elem(map_exit_fd, &key, &prog_fd, BPF_ANY); if (err) break; /* use beauty_map to tell BPF how many bytes to collect, set beauty_map's value here */ memset(beauty_array, 0, sizeof(beauty_array)); - err = trace__bpf_sys_enter_beauty_map(trace, key, (unsigned int *)beauty_array); + err = trace__bpf_sys_enter_beauty_map(trace, e_machine, key, (unsigned int *)beauty_array); if (err) continue; err = bpf_map_update_elem(beauty_map_fd, &key, beauty_array, BPF_ANY); @@ -3898,9 +4044,9 @@ static int trace__init_syscalls_bpf_prog_array_maps(struct trace *trace) * first and second arg (this one on the raw_syscalls:sys_exit prog * array tail call, then that one will be used. */ - for (int i = 0; i < trace->sctbl->syscalls.nr_entries; ++i) { - int key = syscalltbl__id_at_idx(trace->sctbl, i); - struct syscall *sc = trace__syscall_info(trace, NULL, key); + for (int i = 0, num_idx = syscalltbl__num_idx(e_machine); i < num_idx; ++i) { + int key = syscalltbl__id_at_idx(e_machine, i); + struct syscall *sc = trace__syscall_info(trace, NULL, e_machine, key); struct bpf_program *pair_prog; int prog_fd; @@ -3982,10 +4128,13 @@ static int trace__set_filter_loop_pids(struct trace *trace) if (!strcmp(thread__comm_str(parent), "sshd") || strstarts(thread__comm_str(parent), "gnome-terminal")) { pids[nr++] = thread__tid(parent); + thread__put(parent); break; } + thread__put(thread); thread = parent; } + thread__put(thread); err = evlist__append_tp_filter_pids(trace->evlist, nr, pids); if (!err && trace->filter_pids.map) @@ -4021,13 +4170,16 @@ static int __trace__deliver_event(struct trace *trace, union perf_event *event) { struct evlist *evlist = trace->evlist; struct perf_sample sample; - int err = evlist__parse_sample(evlist, event, &sample); + int err; + perf_sample__init(&sample, /*all=*/false); + err = evlist__parse_sample(evlist, event, &sample); if (err) fprintf(trace->output, "Can't parse sample, err = %d, skipping...\n", err); else trace__handle_event(trace, event, &sample); + perf_sample__exit(&sample); return 0; } @@ -4077,17 +4229,23 @@ static int ordered_events__deliver_event(struct ordered_events *oe, static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg, char **type) { - struct tep_format_field *field; struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel); + const struct tep_event *tp_format; + + if (!fmt) + return NULL; - if (evsel->tp_format == NULL || fmt == NULL) + tp_format = evsel__tp_format(evsel); + if (!tp_format) return NULL; - for (field = evsel->tp_format->format.fields; field; field = field->next, ++fmt) + for (const struct tep_format_field *field = tp_format->format.fields; field; + field = field->next, ++fmt) { if (strcmp(field->name, arg) == 0) { *type = field->type; return fmt; } + } return NULL; } @@ -4222,6 +4380,14 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->live = true; + if (trace->summary_bpf) { + if (trace_prepare_bpf_summary(trace->summary_mode) < 0) + goto out_delete_evlist; + + if (trace->summary_only) + goto create_maps; + } + if (!trace->raw_augmented_syscalls) { if (trace->trace_syscalls && trace__add_syscall_newtp(trace)) goto out_error_raw_syscalls; @@ -4280,6 +4446,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (trace->cgroup) evlist__set_default_cgroup(trace->evlist, trace->cgroup); +create_maps: err = evlist__create_maps(evlist, &trace->opts.target); if (err < 0) { fprintf(trace->output, "Problems parsing the target to trace, check your options!\n"); @@ -4292,6 +4459,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_delete_evlist; } + if (trace->summary_mode == SUMMARY__BY_TOTAL && !trace->summary_bpf) { + trace->syscall_stats = alloc_syscall_stats(); + if (trace->syscall_stats == NULL) + goto out_delete_evlist; + } + evlist__config(evlist, &trace->opts, &callchain_param); if (forks) { @@ -4315,10 +4488,12 @@ static int trace__run(struct trace *trace, int argc, const char **argv) * CPU the bpf-output event's file descriptor. */ perf_cpu_map__for_each_cpu(cpu, i, trace->syscalls.events.bpf_output->core.cpus) { + int mycpu = cpu.cpu; + bpf_map__update_elem(trace->skel->maps.__augmented_syscalls__, - &cpu.cpu, sizeof(int), + &mycpu, sizeof(mycpu), xyarray__entry(trace->syscalls.events.bpf_output->core.fd, - cpu.cpu, 0), + mycpu, 0), sizeof(__u32), BPF_ANY); } } @@ -4331,8 +4506,13 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_error_mem; #ifdef HAVE_BPF_SKEL - if (trace->skel && trace->skel->progs.sys_enter) - trace__init_syscalls_bpf_prog_array_maps(trace); + if (trace->skel && trace->skel->progs.sys_enter) { + /* + * TODO: Initialize for all host binary machine types, not just + * those matching the perf binary. + */ + trace__init_syscalls_bpf_prog_array_maps(trace, EM_HOST); + } #endif if (trace->ev_qualifier_ids.nr > 0) { @@ -4357,7 +4537,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) * So just disable this beautifier (SCA_FD, SCA_FDAT) when 'close' is * not in use. */ - trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(trace->sctbl, "close")); + /* TODO: support for more than just perf binary machine type close. */ + trace->fd_path_disabled = !trace__syscall_enabled(trace, syscalltbl__id(EM_HOST, "close")); err = trace__expand_filters(trace, &evsel); if (err) @@ -4366,9 +4547,11 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_apply_filters; - err = evlist__mmap(evlist, trace->opts.mmap_pages); - if (err < 0) - goto out_error_mmap; + if (!trace->summary_only || !trace->summary_bpf) { + err = evlist__mmap(evlist, trace->opts.mmap_pages); + if (err < 0) + goto out_error_mmap; + } if (!target__none(&trace->opts.target) && !trace->opts.target.initial_delay) evlist__enable(evlist); @@ -4381,6 +4564,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__enable(evlist); } + if (trace->summary_bpf) + trace_start_bpf_summary(); + trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 || perf_thread_map__nr(evlist->core.threads) > 1 || evlist__first(evlist)->core.attr.inherit; @@ -4448,12 +4634,21 @@ out_disable: evlist__disable(evlist); + if (trace->summary_bpf) + trace_end_bpf_summary(); + if (trace->sort_events) ordered_events__flush(&trace->oe.data, OE_FLUSH__FINAL); if (!err) { - if (trace->summary) - trace__fprintf_thread_summary(trace, trace->output); + if (trace->summary) { + if (trace->summary_bpf) + trace_print_bpf_summary(trace->output); + else if (trace->summary_mode == SUMMARY__BY_TOTAL) + trace__fprintf_total_summary(trace, trace->output); + else + trace__fprintf_thread_summary(trace, trace->output); + } if (trace->show_tool_stats) { fprintf(trace->output, "Stats:\n " @@ -4465,6 +4660,8 @@ out_disable: } out_delete_evlist: + trace_cleanup_bpf_summary(); + delete_syscall_stats(trace->syscall_stats); trace__symbols__exit(trace); evlist__free_syscall_tp_fields(evlist); evlist__delete(evlist); @@ -4524,6 +4721,7 @@ static int trace__replay(struct trace *trace) struct evsel *evsel; int err = -1; + perf_tool__init(&trace->tool, /*ordered_events=*/true); trace->tool.sample = trace__process_sample; trace->tool.mmap = perf_event__process_mmap; trace->tool.mmap2 = perf_event__process_mmap2; @@ -4592,6 +4790,12 @@ static int trace__replay(struct trace *trace) evsel->handler = trace__pgfault; } + if (trace->summary_mode == SUMMARY__BY_TOTAL) { + trace->syscall_stats = alloc_syscall_stats(); + if (trace->syscall_stats == NULL) + goto out; + } + setup_pager(); err = perf_session__process_events(session); @@ -4602,12 +4806,13 @@ static int trace__replay(struct trace *trace) trace__fprintf_thread_summary(trace, trace->output); out: + delete_syscall_stats(trace->syscall_stats); perf_session__delete(session); return err; } -static size_t trace__fprintf_threads_header(FILE *fp) +static size_t trace__fprintf_summary_header(FILE *fp) { size_t printed; @@ -4616,29 +4821,56 @@ static size_t trace__fprintf_threads_header(FILE *fp) return printed; } -DEFINE_RESORT_RB(syscall_stats, a->msecs > b->msecs, +struct syscall_entry { struct syscall_stats *stats; double msecs; int syscall; -) +}; + +static int entry_cmp(const void *e1, const void *e2) { - struct int_node *source = rb_entry(nd, struct int_node, rb_node); - struct syscall_stats *stats = source->priv; + const struct syscall_entry *entry1 = e1; + const struct syscall_entry *entry2 = e2; - entry->syscall = source->i; - entry->stats = stats; - entry->msecs = stats ? (u64)stats->stats.n * (avg_stats(&stats->stats) / NSEC_PER_MSEC) : 0; + return entry1->msecs > entry2->msecs ? -1 : 1; } -static size_t thread__dump_stats(struct thread_trace *ttrace, - struct trace *trace, FILE *fp) +static struct syscall_entry *syscall__sort_stats(struct hashmap *syscall_stats) +{ + struct syscall_entry *entry; + struct hashmap_entry *pos; + unsigned bkt, i, nr; + + nr = syscall_stats->sz; + entry = malloc(nr * sizeof(*entry)); + if (entry == NULL) + return NULL; + + i = 0; + hashmap__for_each_entry(syscall_stats, pos, bkt) { + struct syscall_stats *ss = pos->pvalue; + struct stats *st = &ss->stats; + + entry[i].stats = ss; + entry[i].msecs = (u64)st->n * (avg_stats(st) / NSEC_PER_MSEC); + entry[i].syscall = pos->key; + i++; + } + assert(i == nr); + + qsort(entry, nr, sizeof(*entry), entry_cmp); + return entry; +} + +static size_t syscall__dump_stats(struct trace *trace, int e_machine, FILE *fp, + struct hashmap *syscall_stats) { size_t printed = 0; struct syscall *sc; - struct rb_node *nd; - DECLARE_RESORT_RB_INTLIST(syscall_stats, ttrace->syscall_stats); + struct syscall_entry *entries; - if (syscall_stats == NULL) + entries = syscall__sort_stats(syscall_stats); + if (entries == NULL) return 0; printed += fprintf(fp, "\n"); @@ -4647,8 +4879,10 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, printed += fprintf(fp, " (msec) (msec) (msec) (msec) (%%)\n"); printed += fprintf(fp, " --------------- -------- ------ -------- --------- --------- --------- ------\n"); - resort_rb__for_each_entry(nd, syscall_stats) { - struct syscall_stats *stats = syscall_stats_entry->stats; + for (size_t i = 0; i < syscall_stats->sz; i++) { + struct syscall_entry *entry = &entries[i]; + struct syscall_stats *stats = entry->stats; + if (stats) { double min = (double)(stats->stats.min) / NSEC_PER_MSEC; double max = (double)(stats->stats.max) / NSEC_PER_MSEC; @@ -4659,10 +4893,13 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, pct = avg ? 100.0 * stddev_stats(&stats->stats) / avg : 0.0; avg /= NSEC_PER_MSEC; - sc = &trace->syscalls.table[syscall_stats_entry->syscall]; + sc = trace__syscall_info(trace, /*evsel=*/NULL, e_machine, entry->syscall); + if (!sc) + continue; + printed += fprintf(fp, " %-15s", sc->name); printed += fprintf(fp, " %8" PRIu64 " %6" PRIu64 " %9.3f %9.3f %9.3f", - n, stats->nr_failures, syscall_stats_entry->msecs, min, avg); + n, stats->nr_failures, entry->msecs, min, avg); printed += fprintf(fp, " %9.3f %9.2f%%\n", max, pct); if (trace->errno_summary && stats->nr_failures) { @@ -4676,16 +4913,28 @@ static size_t thread__dump_stats(struct thread_trace *ttrace, } } - resort_rb__delete(syscall_stats); + free(entries); printed += fprintf(fp, "\n\n"); return printed; } +static size_t thread__dump_stats(struct thread_trace *ttrace, + struct trace *trace, int e_machine, FILE *fp) +{ + return syscall__dump_stats(trace, e_machine, fp, ttrace->syscall_stats); +} + +static size_t system__dump_stats(struct trace *trace, int e_machine, FILE *fp) +{ + return syscall__dump_stats(trace, e_machine, fp, trace->syscall_stats); +} + static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trace *trace) { size_t printed = 0; struct thread_trace *ttrace = thread__priv(thread); + int e_machine = thread__e_machine(thread, trace->host); double ratio; if (ttrace == NULL) @@ -4705,7 +4954,7 @@ static size_t trace__fprintf_thread(FILE *fp, struct thread *thread, struct trac else if (fputc('\n', fp) != EOF) ++printed; - printed += thread__dump_stats(ttrace, trace, fp); + printed += thread__dump_stats(ttrace, trace, e_machine, fp); return printed; } @@ -4735,7 +4984,7 @@ static int trace_nr_events_cmp(void *priv __maybe_unused, static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) { - size_t printed = trace__fprintf_threads_header(fp); + size_t printed = trace__fprintf_summary_header(fp); LIST_HEAD(threads); if (machine__thread_list(trace->host, &threads) == 0) { @@ -4750,6 +4999,28 @@ static size_t trace__fprintf_thread_summary(struct trace *trace, FILE *fp) return printed; } +static size_t trace__fprintf_total_summary(struct trace *trace, FILE *fp) +{ + size_t printed = trace__fprintf_summary_header(fp); + + printed += fprintf(fp, " total, "); + printed += fprintf(fp, "%lu events", trace->nr_events); + + if (trace->pfmaj) + printed += fprintf(fp, ", %lu majfaults", trace->pfmaj); + if (trace->pfmin) + printed += fprintf(fp, ", %lu minfaults", trace->pfmin); + if (trace->sched) + printed += fprintf(fp, ", %.3f msec\n", trace->runtime_ms); + else if (fputc('\n', fp) != EOF) + ++printed; + + /* TODO: get all system e_machines. */ + printed += system__dump_stats(trace, EM_HOST, fp); + + return printed; +} + static int trace__set_duration(const struct option *opt, const char *str, int unset __maybe_unused) { @@ -4843,13 +5114,18 @@ static void evsel__set_syscall_arg_fmt(struct evsel *evsel, const char *name) const struct syscall_fmt *scfmt = syscall_fmt__find(name); if (scfmt) { - int skip = 0; + const struct tep_event *tp_format = evsel__tp_format(evsel); + + if (tp_format) { + int skip = 0; - if (strcmp(evsel->tp_format->format.fields->name, "__syscall_nr") == 0 || - strcmp(evsel->tp_format->format.fields->name, "nr") == 0) - ++skip; + if (strcmp(tp_format->format.fields->name, "__syscall_nr") == 0 || + strcmp(tp_format->format.fields->name, "nr") == 0) + ++skip; - memcpy(fmt + skip, scfmt->arg, (evsel->tp_format->format.nr_fields - skip) * sizeof(*fmt)); + memcpy(fmt + skip, scfmt->arg, + (tp_format->format.nr_fields - skip) * sizeof(*fmt)); + } } } } @@ -4859,10 +5135,16 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist, bool *use_btf) struct evsel *evsel; evlist__for_each_entry(evlist, evsel) { - if (evsel->priv || !evsel->tp_format) + const struct tep_event *tp_format; + + if (evsel->priv) + continue; + + tp_format = evsel__tp_format(evsel); + if (!tp_format) continue; - if (strcmp(evsel->tp_format->system, "syscalls")) { + if (strcmp(tp_format->system, "syscalls")) { evsel__init_tp_arg_scnprintf(evsel, use_btf); continue; } @@ -4870,20 +5152,24 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist, bool *use_btf) if (evsel__init_syscall_tp(evsel)) return -1; - if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) { + if (!strncmp(tp_format->name, "sys_enter_", 10)) { struct syscall_tp *sc = __evsel__syscall_tp(evsel); if (__tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64))) return -1; - evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_enter_") - 1); - } else if (!strncmp(evsel->tp_format->name, "sys_exit_", 9)) { + evsel__set_syscall_arg_fmt(evsel, + tp_format->name + sizeof("sys_enter_") - 1); + } else if (!strncmp(tp_format->name, "sys_exit_", 9)) { struct syscall_tp *sc = __evsel__syscall_tp(evsel); - if (__tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap)) + if (__tp_field__init_uint(&sc->ret, sizeof(u64), + sc->id.offset + sizeof(u64), + evsel->needs_swap)) return -1; - evsel__set_syscall_arg_fmt(evsel, evsel->tp_format->name + sizeof("sys_exit_") - 1); + evsel__set_syscall_arg_fmt(evsel, + tp_format->name + sizeof("sys_exit_") - 1); } } @@ -4922,8 +5208,9 @@ static int trace__parse_events_option(const struct option *opt, const char *str, *sep = '\0'; list = 0; - if (syscalltbl__id(trace->sctbl, s) >= 0 || - syscalltbl__strglobmatch_first(trace->sctbl, s, &idx) >= 0) { + /* TODO: support for more than just perf binary machine type syscalls. */ + if (syscalltbl__id(EM_HOST, s) >= 0 || + syscalltbl__strglobmatch_first(EM_HOST, s, &idx) >= 0) { list = 1; goto do_concat; } @@ -5006,6 +5293,25 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u return 0; } +static int trace__parse_summary_mode(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct trace *trace = opt->value; + + if (!strcmp(str, "thread")) { + trace->summary_mode = SUMMARY__BY_THREAD; + } else if (!strcmp(str, "total")) { + trace->summary_mode = SUMMARY__BY_TOTAL; + } else if (!strcmp(str, "cgroup")) { + trace->summary_mode = SUMMARY__BY_CGROUP; + } else { + pr_err("Unknown summary mode: %s\n", str); + return -1; + } + + return 0; +} + static int trace__config(const char *var, const char *value, void *arg) { struct trace *trace = arg; @@ -5052,17 +5358,20 @@ out: static void trace__exit(struct trace *trace) { - int i; - strlist__delete(trace->ev_qualifier); zfree(&trace->ev_qualifier_ids.entries); if (trace->syscalls.table) { - for (i = 0; i <= trace->sctbl->syscalls.max_id; i++) - syscall__exit(&trace->syscalls.table[i]); + for (size_t i = 0; i < trace->syscalls.table_size; i++) + syscall__delete(trace->syscalls.table[i]); zfree(&trace->syscalls.table); } - syscalltbl__delete(trace->sctbl); zfree(&trace->perfconfig_events); + evlist__delete(trace->evlist); + trace->evlist = NULL; +#ifdef HAVE_LIBBPF_SUPPORT + btf__free(trace->btf); + trace->btf = NULL; +#endif } #ifdef HAVE_BPF_SKEL @@ -5153,6 +5462,9 @@ int cmd_trace(int argc, const char **argv) "Show all syscalls and summary with statistics"), OPT_BOOLEAN(0, "errno-summary", &trace.errno_summary, "Show errno stats per syscall, use with -s or -S"), + OPT_CALLBACK(0, "summary-mode", &trace, "mode", + "How to show summary: select thread (default), total or cgroup", + trace__parse_summary_mode), OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", "Trace pagefaults", parse_pagefaults, "maj"), OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), @@ -5186,6 +5498,7 @@ int cmd_trace(int argc, const char **argv) "start"), OPT_BOOLEAN(0, "force-btf", &trace.force_btf, "Prefer btf_dump general pretty printer" "to customized ones"), + OPT_BOOLEAN(0, "bpf-summary", &trace.summary_bpf, "Summary syscall stats in BPF"), OPTS_EVSWITCH(&trace.evswitch), OPT_END() }; @@ -5207,9 +5520,8 @@ int cmd_trace(int argc, const char **argv) sigaction(SIGCHLD, &sigchld_act, NULL); trace.evlist = evlist__new(); - trace.sctbl = syscalltbl__new(); - if (trace.evlist == NULL || trace.sctbl == NULL) { + if (trace.evlist == NULL) { pr_err("Not enough memory to run!\n"); err = -ENOMEM; goto out; @@ -5278,6 +5590,16 @@ int cmd_trace(int argc, const char **argv) goto skip_augmentation; } + if (trace.summary_bpf) { + if (!trace.opts.target.system_wide) { + /* TODO: Add filters in the BPF to support other targets. */ + pr_err("Error: --bpf-summary only works for system-wide mode.\n"); + goto out; + } + if (trace.summary_only) + goto skip_augmentation; + } + trace.skel = augmented_raw_syscalls_bpf__open(); if (!trace.skel) { pr_debug("Failed to open augmented syscalls BPF skeleton"); @@ -5437,8 +5759,10 @@ init_augmented_syscall_tp: } } - if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) - return trace__record(&trace, argc-1, &argv[1]); + if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) { + err = trace__record(&trace, argc-1, &argv[1]); + goto out; + } /* Using just --errno-summary will trigger --summary */ if (trace.errno_summary && !trace.summary && !trace.summary_only) @@ -5449,8 +5773,17 @@ init_augmented_syscall_tp: trace.summary = trace.summary_only; /* Keep exited threads, otherwise information might be lost for summary */ - if (trace.summary) + if (trace.summary) { symbol_conf.keep_exited_threads = true; + if (trace.summary_mode == SUMMARY__NONE) + trace.summary_mode = SUMMARY__BY_THREAD; + + if (!trace.summary_bpf && trace.summary_mode == SUMMARY__BY_CGROUP) { + pr_err("Error: --summary-mode=cgroup only works with --bpf-summary\n"); + err = -EINVAL; + goto out; + } + } if (output_name != NULL) { err = trace__open_output(&trace, output_name); |