From 864709302a80f26fa9da3be5b47304f0b8bae192 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Jun 2009 20:33:43 +0200 Subject: perf_counter tools: Move from Documentation/perf_counter/ to tools/perf/ Several people have suggested that 'perf' has become a full-fledged tool that should be moved out of Documentation/. Move it to the (new) tools/ directory. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 1291 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1291 insertions(+) create mode 100644 tools/perf/builtin-report.c (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c new file mode 100644 index 000000000000..242e09ff3658 --- /dev/null +++ b/tools/perf/builtin-report.c @@ -0,0 +1,1291 @@ +/* + * builtin-report.c + * + * Builtin report command: Analyze the perf.data input file, + * look up and read DSOs and symbol information and display + * a histogram of results, along various sorting keys. + */ +#include "builtin.h" + +#include "util/util.h" + +#include "util/color.h" +#include "util/list.h" +#include "util/cache.h" +#include "util/rbtree.h" +#include "util/symbol.h" +#include "util/string.h" + +#include "perf.h" + +#include "util/parse-options.h" +#include "util/parse-events.h" + +#define SHOW_KERNEL 1 +#define SHOW_USER 2 +#define SHOW_HV 4 + +static char const *input_name = "perf.data"; +static char *vmlinux = NULL; + +static char default_sort_order[] = "comm,dso"; +static char *sort_order = default_sort_order; + +static int input; +static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; + +static int dump_trace = 0; +#define dprintf(x...) do { if (dump_trace) printf(x); } while (0) + +static int verbose; +static int full_paths; + +static unsigned long page_size; +static unsigned long mmap_window = 32; + +struct ip_event { + struct perf_event_header header; + __u64 ip; + __u32 pid, tid; +}; + +struct mmap_event { + struct perf_event_header header; + __u32 pid, tid; + __u64 start; + __u64 len; + __u64 pgoff; + char filename[PATH_MAX]; +}; + +struct comm_event { + struct perf_event_header header; + __u32 pid, tid; + char comm[16]; +}; + +struct fork_event { + struct perf_event_header header; + __u32 pid, ppid; +}; + +struct period_event { + struct perf_event_header header; + __u64 time; + __u64 id; + __u64 sample_period; +}; + +typedef union event_union { + struct perf_event_header header; + struct ip_event ip; + struct mmap_event mmap; + struct comm_event comm; + struct fork_event fork; + struct period_event period; +} event_t; + +static LIST_HEAD(dsos); +static struct dso *kernel_dso; +static struct dso *vdso; + +static void dsos__add(struct dso *dso) +{ + list_add_tail(&dso->node, &dsos); +} + +static struct dso *dsos__find(const char *name) +{ + struct dso *pos; + + list_for_each_entry(pos, &dsos, node) + if (strcmp(pos->name, name) == 0) + return pos; + return NULL; +} + +static struct dso *dsos__findnew(const char *name) +{ + struct dso *dso = dsos__find(name); + int nr; + + if (dso) + return dso; + + dso = dso__new(name, 0); + if (!dso) + goto out_delete_dso; + + nr = dso__load(dso, NULL, verbose); + if (nr < 0) { + if (verbose) + fprintf(stderr, "Failed to open: %s\n", name); + goto out_delete_dso; + } + if (!nr && verbose) { + fprintf(stderr, + "No symbols found in: %s, maybe install a debug package?\n", + name); + } + + dsos__add(dso); + + return dso; + +out_delete_dso: + dso__delete(dso); + return NULL; +} + +static void dsos__fprintf(FILE *fp) +{ + struct dso *pos; + + list_for_each_entry(pos, &dsos, node) + dso__fprintf(pos, fp); +} + +static struct symbol *vdso__find_symbol(struct dso *dso, uint64_t ip) +{ + return dso__find_symbol(kernel_dso, ip); +} + +static int load_kernel(void) +{ + int err; + + kernel_dso = dso__new("[kernel]", 0); + if (!kernel_dso) + return -1; + + err = dso__load_kernel(kernel_dso, vmlinux, NULL, verbose); + if (err) { + dso__delete(kernel_dso); + kernel_dso = NULL; + } else + dsos__add(kernel_dso); + + vdso = dso__new("[vdso]", 0); + if (!vdso) + return -1; + + vdso->find_symbol = vdso__find_symbol; + + dsos__add(vdso); + + return err; +} + +static char __cwd[PATH_MAX]; +static char *cwd = __cwd; +static int cwdlen; + +static int strcommon(const char *pathname) +{ + int n = 0; + + while (pathname[n] == cwd[n] && n < cwdlen) + ++n; + + return n; +} + +struct map { + struct list_head node; + uint64_t start; + uint64_t end; + uint64_t pgoff; + uint64_t (*map_ip)(struct map *, uint64_t); + struct dso *dso; +}; + +static uint64_t map__map_ip(struct map *map, uint64_t ip) +{ + return ip - map->start + map->pgoff; +} + +static uint64_t vdso__map_ip(struct map *map, uint64_t ip) +{ + return ip; +} + +static struct map *map__new(struct mmap_event *event) +{ + struct map *self = malloc(sizeof(*self)); + + if (self != NULL) { + const char *filename = event->filename; + char newfilename[PATH_MAX]; + + if (cwd) { + int n = strcommon(filename); + + if (n == cwdlen) { + snprintf(newfilename, sizeof(newfilename), + ".%s", filename + n); + filename = newfilename; + } + } + + self->start = event->start; + self->end = event->start + event->len; + self->pgoff = event->pgoff; + + self->dso = dsos__findnew(filename); + if (self->dso == NULL) + goto out_delete; + + if (self->dso == vdso) + self->map_ip = vdso__map_ip; + else + self->map_ip = map__map_ip; + } + return self; +out_delete: + free(self); + return NULL; +} + +static struct map *map__clone(struct map *self) +{ + struct map *map = malloc(sizeof(*self)); + + if (!map) + return NULL; + + memcpy(map, self, sizeof(*self)); + + return map; +} + +static int map__overlap(struct map *l, struct map *r) +{ + if (l->start > r->start) { + struct map *t = l; + l = r; + r = t; + } + + if (l->end > r->start) + return 1; + + return 0; +} + +static size_t map__fprintf(struct map *self, FILE *fp) +{ + return fprintf(fp, " %"PRIx64"-%"PRIx64" %"PRIx64" %s\n", + self->start, self->end, self->pgoff, self->dso->name); +} + + +struct thread { + struct rb_node rb_node; + struct list_head maps; + pid_t pid; + char *comm; +}; + +static struct thread *thread__new(pid_t pid) +{ + struct thread *self = malloc(sizeof(*self)); + + if (self != NULL) { + self->pid = pid; + self->comm = malloc(32); + if (self->comm) + snprintf(self->comm, 32, ":%d", self->pid); + INIT_LIST_HEAD(&self->maps); + } + + return self; +} + +static int thread__set_comm(struct thread *self, const char *comm) +{ + if (self->comm) + free(self->comm); + self->comm = strdup(comm); + return self->comm ? 0 : -ENOMEM; +} + +static size_t thread__fprintf(struct thread *self, FILE *fp) +{ + struct map *pos; + size_t ret = fprintf(fp, "Thread %d %s\n", self->pid, self->comm); + + list_for_each_entry(pos, &self->maps, node) + ret += map__fprintf(pos, fp); + + return ret; +} + + +static struct rb_root threads; +static struct thread *last_match; + +static struct thread *threads__findnew(pid_t pid) +{ + struct rb_node **p = &threads.rb_node; + struct rb_node *parent = NULL; + struct thread *th; + + /* + * Font-end cache - PID lookups come in blocks, + * so most of the time we dont have to look up + * the full rbtree: + */ + if (last_match && last_match->pid == pid) + return last_match; + + while (*p != NULL) { + parent = *p; + th = rb_entry(parent, struct thread, rb_node); + + if (th->pid == pid) { + last_match = th; + return th; + } + + if (pid < th->pid) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + th = thread__new(pid); + if (th != NULL) { + rb_link_node(&th->rb_node, parent, p); + rb_insert_color(&th->rb_node, &threads); + last_match = th; + } + + return th; +} + +static void thread__insert_map(struct thread *self, struct map *map) +{ + struct map *pos, *tmp; + + list_for_each_entry_safe(pos, tmp, &self->maps, node) { + if (map__overlap(pos, map)) { + list_del_init(&pos->node); + /* XXX leaks dsos */ + free(pos); + } + } + + list_add_tail(&map->node, &self->maps); +} + +static int thread__fork(struct thread *self, struct thread *parent) +{ + struct map *map; + + if (self->comm) + free(self->comm); + self->comm = strdup(parent->comm); + if (!self->comm) + return -ENOMEM; + + list_for_each_entry(map, &parent->maps, node) { + struct map *new = map__clone(map); + if (!new) + return -ENOMEM; + thread__insert_map(self, new); + } + + return 0; +} + +static struct map *thread__find_map(struct thread *self, uint64_t ip) +{ + struct map *pos; + + if (self == NULL) + return NULL; + + list_for_each_entry(pos, &self->maps, node) + if (ip >= pos->start && ip <= pos->end) + return pos; + + return NULL; +} + +static size_t threads__fprintf(FILE *fp) +{ + size_t ret = 0; + struct rb_node *nd; + + for (nd = rb_first(&threads); nd; nd = rb_next(nd)) { + struct thread *pos = rb_entry(nd, struct thread, rb_node); + + ret += thread__fprintf(pos, fp); + } + + return ret; +} + +/* + * histogram, sorted on item, collects counts + */ + +static struct rb_root hist; + +struct hist_entry { + struct rb_node rb_node; + + struct thread *thread; + struct map *map; + struct dso *dso; + struct symbol *sym; + uint64_t ip; + char level; + + uint32_t count; +}; + +/* + * configurable sorting bits + */ + +struct sort_entry { + struct list_head list; + + char *header; + + int64_t (*cmp)(struct hist_entry *, struct hist_entry *); + int64_t (*collapse)(struct hist_entry *, struct hist_entry *); + size_t (*print)(FILE *fp, struct hist_entry *); +}; + +/* --sort pid */ + +static int64_t +sort__thread_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->thread->pid - left->thread->pid; +} + +static size_t +sort__thread_print(FILE *fp, struct hist_entry *self) +{ + return fprintf(fp, "%16s:%5d", self->thread->comm ?: "", self->thread->pid); +} + +static struct sort_entry sort_thread = { + .header = " Command: Pid", + .cmp = sort__thread_cmp, + .print = sort__thread_print, +}; + +/* --sort comm */ + +static int64_t +sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->thread->pid - left->thread->pid; +} + +static int64_t +sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) +{ + char *comm_l = left->thread->comm; + char *comm_r = right->thread->comm; + + if (!comm_l || !comm_r) { + if (!comm_l && !comm_r) + return 0; + else if (!comm_l) + return -1; + else + return 1; + } + + return strcmp(comm_l, comm_r); +} + +static size_t +sort__comm_print(FILE *fp, struct hist_entry *self) +{ + return fprintf(fp, "%16s", self->thread->comm); +} + +static struct sort_entry sort_comm = { + .header = " Command", + .cmp = sort__comm_cmp, + .collapse = sort__comm_collapse, + .print = sort__comm_print, +}; + +/* --sort dso */ + +static int64_t +sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct dso *dso_l = left->dso; + struct dso *dso_r = right->dso; + + if (!dso_l || !dso_r) { + if (!dso_l && !dso_r) + return 0; + else if (!dso_l) + return -1; + else + return 1; + } + + return strcmp(dso_l->name, dso_r->name); +} + +static size_t +sort__dso_print(FILE *fp, struct hist_entry *self) +{ + if (self->dso) + return fprintf(fp, "%-25s", self->dso->name); + + return fprintf(fp, "%016llx ", (__u64)self->ip); +} + +static struct sort_entry sort_dso = { + .header = "Shared Object ", + .cmp = sort__dso_cmp, + .print = sort__dso_print, +}; + +/* --sort symbol */ + +static int64_t +sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) +{ + uint64_t ip_l, ip_r; + + if (left->sym == right->sym) + return 0; + + ip_l = left->sym ? left->sym->start : left->ip; + ip_r = right->sym ? right->sym->start : right->ip; + + return (int64_t)(ip_r - ip_l); +} + +static size_t +sort__sym_print(FILE *fp, struct hist_entry *self) +{ + size_t ret = 0; + + if (verbose) + ret += fprintf(fp, "%#018llx ", (__u64)self->ip); + + if (self->sym) { + ret += fprintf(fp, "[%c] %s", + self->dso == kernel_dso ? 'k' : '.', self->sym->name); + } else { + ret += fprintf(fp, "%#016llx", (__u64)self->ip); + } + + return ret; +} + +static struct sort_entry sort_sym = { + .header = "Symbol", + .cmp = sort__sym_cmp, + .print = sort__sym_print, +}; + +static int sort__need_collapse = 0; + +struct sort_dimension { + char *name; + struct sort_entry *entry; + int taken; +}; + +static struct sort_dimension sort_dimensions[] = { + { .name = "pid", .entry = &sort_thread, }, + { .name = "comm", .entry = &sort_comm, }, + { .name = "dso", .entry = &sort_dso, }, + { .name = "symbol", .entry = &sort_sym, }, +}; + +static LIST_HEAD(hist_entry__sort_list); + +static int sort_dimension__add(char *tok) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(sort_dimensions); i++) { + struct sort_dimension *sd = &sort_dimensions[i]; + + if (sd->taken) + continue; + + if (strncasecmp(tok, sd->name, strlen(tok))) + continue; + + if (sd->entry->collapse) + sort__need_collapse = 1; + + list_add_tail(&sd->entry->list, &hist_entry__sort_list); + sd->taken = 1; + + return 0; + } + + return -ESRCH; +} + +static int64_t +hist_entry__cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct sort_entry *se; + int64_t cmp = 0; + + list_for_each_entry(se, &hist_entry__sort_list, list) { + cmp = se->cmp(left, right); + if (cmp) + break; + } + + return cmp; +} + +static int64_t +hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) +{ + struct sort_entry *se; + int64_t cmp = 0; + + list_for_each_entry(se, &hist_entry__sort_list, list) { + int64_t (*f)(struct hist_entry *, struct hist_entry *); + + f = se->collapse ?: se->cmp; + + cmp = f(left, right); + if (cmp) + break; + } + + return cmp; +} + +static size_t +hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples) +{ + struct sort_entry *se; + size_t ret; + + if (total_samples) { + double percent = self->count * 100.0 / total_samples; + char *color = PERF_COLOR_NORMAL; + + /* + * We color high-overhead entries in red, low-overhead + * entries in green - and keep the middle ground normal: + */ + if (percent >= 5.0) + color = PERF_COLOR_RED; + if (percent < 0.5) + color = PERF_COLOR_GREEN; + + ret = color_fprintf(fp, color, " %6.2f%%", + (self->count * 100.0) / total_samples); + } else + ret = fprintf(fp, "%12d ", self->count); + + list_for_each_entry(se, &hist_entry__sort_list, list) { + fprintf(fp, " "); + ret += se->print(fp, self); + } + + ret += fprintf(fp, "\n"); + + return ret; +} + +/* + * collect histogram counts + */ + +static int +hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, + struct symbol *sym, uint64_t ip, char level) +{ + struct rb_node **p = &hist.rb_node; + struct rb_node *parent = NULL; + struct hist_entry *he; + struct hist_entry entry = { + .thread = thread, + .map = map, + .dso = dso, + .sym = sym, + .ip = ip, + .level = level, + .count = 1, + }; + int cmp; + + while (*p != NULL) { + parent = *p; + he = rb_entry(parent, struct hist_entry, rb_node); + + cmp = hist_entry__cmp(&entry, he); + + if (!cmp) { + he->count++; + return 0; + } + + if (cmp < 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + he = malloc(sizeof(*he)); + if (!he) + return -ENOMEM; + *he = entry; + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, &hist); + + return 0; +} + +static void hist_entry__free(struct hist_entry *he) +{ + free(he); +} + +/* + * collapse the histogram + */ + +static struct rb_root collapse_hists; + +static void collapse__insert_entry(struct hist_entry *he) +{ + struct rb_node **p = &collapse_hists.rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter; + int64_t cmp; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node); + + cmp = hist_entry__collapse(iter, he); + + if (!cmp) { + iter->count += he->count; + hist_entry__free(he); + return; + } + + if (cmp < 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, &collapse_hists); +} + +static void collapse__resort(void) +{ + struct rb_node *next; + struct hist_entry *n; + + if (!sort__need_collapse) + return; + + next = rb_first(&hist); + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + next = rb_next(&n->rb_node); + + rb_erase(&n->rb_node, &hist); + collapse__insert_entry(n); + } +} + +/* + * reverse the map, sort on count. + */ + +static struct rb_root output_hists; + +static void output__insert_entry(struct hist_entry *he) +{ + struct rb_node **p = &output_hists.rb_node; + struct rb_node *parent = NULL; + struct hist_entry *iter; + + while (*p != NULL) { + parent = *p; + iter = rb_entry(parent, struct hist_entry, rb_node); + + if (he->count > iter->count) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&he->rb_node, parent, p); + rb_insert_color(&he->rb_node, &output_hists); +} + +static void output__resort(void) +{ + struct rb_node *next; + struct hist_entry *n; + struct rb_root *tree = &hist; + + if (sort__need_collapse) + tree = &collapse_hists; + + next = rb_first(tree); + + while (next) { + n = rb_entry(next, struct hist_entry, rb_node); + next = rb_next(&n->rb_node); + + rb_erase(&n->rb_node, tree); + output__insert_entry(n); + } +} + +static size_t output__fprintf(FILE *fp, uint64_t total_samples) +{ + struct hist_entry *pos; + struct sort_entry *se; + struct rb_node *nd; + size_t ret = 0; + + fprintf(fp, "\n"); + fprintf(fp, "#\n"); + fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples); + fprintf(fp, "#\n"); + + fprintf(fp, "# Overhead"); + list_for_each_entry(se, &hist_entry__sort_list, list) + fprintf(fp, " %s", se->header); + fprintf(fp, "\n"); + + fprintf(fp, "# ........"); + list_for_each_entry(se, &hist_entry__sort_list, list) { + int i; + + fprintf(fp, " "); + for (i = 0; i < strlen(se->header); i++) + fprintf(fp, "."); + } + fprintf(fp, "\n"); + + fprintf(fp, "#\n"); + + for (nd = rb_first(&output_hists); nd; nd = rb_next(nd)) { + pos = rb_entry(nd, struct hist_entry, rb_node); + ret += hist_entry__fprintf(fp, pos, total_samples); + } + + if (!strcmp(sort_order, default_sort_order)) { + fprintf(fp, "#\n"); + fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n"); + fprintf(fp, "#\n"); + } + fprintf(fp, "\n"); + + return ret; +} + +static void register_idle_thread(void) +{ + struct thread *thread = threads__findnew(0); + + if (thread == NULL || + thread__set_comm(thread, "[idle]")) { + fprintf(stderr, "problem inserting idle task.\n"); + exit(-1); + } +} + +static unsigned long total = 0, + total_mmap = 0, + total_comm = 0, + total_fork = 0, + total_unknown = 0; + +static int +process_overflow_event(event_t *event, unsigned long offset, unsigned long head) +{ + char level; + int show = 0; + struct dso *dso = NULL; + struct thread *thread = threads__findnew(event->ip.pid); + uint64_t ip = event->ip.ip; + struct map *map = NULL; + + dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->header.misc, + event->ip.pid, + (void *)(long)ip); + + dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); + + if (thread == NULL) { + fprintf(stderr, "problem processing %d event, skipping it.\n", + event->header.type); + return -1; + } + + if (event->header.misc & PERF_EVENT_MISC_KERNEL) { + show = SHOW_KERNEL; + level = 'k'; + + dso = kernel_dso; + + dprintf(" ...... dso: %s\n", dso->name); + + } else if (event->header.misc & PERF_EVENT_MISC_USER) { + + show = SHOW_USER; + level = '.'; + + map = thread__find_map(thread, ip); + if (map != NULL) { + ip = map->map_ip(map, ip); + dso = map->dso; + } else { + /* + * If this is outside of all known maps, + * and is a negative address, try to look it + * up in the kernel dso, as it might be a + * vsyscall (which executes in user-mode): + */ + if ((long long)ip < 0) + dso = kernel_dso; + } + dprintf(" ...... dso: %s\n", dso ? dso->name : ""); + + } else { + show = SHOW_HV; + level = 'H'; + dprintf(" ...... dso: [hypervisor]\n"); + } + + if (show & show_mask) { + struct symbol *sym = NULL; + + if (dso) + sym = dso->find_symbol(dso, ip); + + if (hist_entry__add(thread, map, dso, sym, ip, level)) { + fprintf(stderr, + "problem incrementing symbol count, skipping event\n"); + return -1; + } + } + total++; + + return 0; +} + +static int +process_mmap_event(event_t *event, unsigned long offset, unsigned long head) +{ + struct thread *thread = threads__findnew(event->mmap.pid); + struct map *map = map__new(&event->mmap); + + dprintf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->mmap.pid, + (void *)(long)event->mmap.start, + (void *)(long)event->mmap.len, + (void *)(long)event->mmap.pgoff, + event->mmap.filename); + + if (thread == NULL || map == NULL) { + dprintf("problem processing PERF_EVENT_MMAP, skipping event.\n"); + return 0; + } + + thread__insert_map(thread, map); + total_mmap++; + + return 0; +} + +static int +process_comm_event(event_t *event, unsigned long offset, unsigned long head) +{ + struct thread *thread = threads__findnew(event->comm.pid); + + dprintf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->comm.comm, event->comm.pid); + + if (thread == NULL || + thread__set_comm(thread, event->comm.comm)) { + dprintf("problem processing PERF_EVENT_COMM, skipping event.\n"); + return -1; + } + total_comm++; + + return 0; +} + +static int +process_fork_event(event_t *event, unsigned long offset, unsigned long head) +{ + struct thread *thread = threads__findnew(event->fork.pid); + struct thread *parent = threads__findnew(event->fork.ppid); + + dprintf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->fork.pid, event->fork.ppid); + + if (!thread || !parent || thread__fork(thread, parent)) { + dprintf("problem processing PERF_EVENT_FORK, skipping event.\n"); + return -1; + } + total_fork++; + + return 0; +} + +static int +process_period_event(event_t *event, unsigned long offset, unsigned long head) +{ + dprintf("%p [%p]: PERF_EVENT_PERIOD: time:%Ld, id:%Ld: period:%Ld\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->period.time, + event->period.id, + event->period.sample_period); + + return 0; +} + +static int +process_event(event_t *event, unsigned long offset, unsigned long head) +{ + if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) + return process_overflow_event(event, offset, head); + + switch (event->header.type) { + case PERF_EVENT_MMAP: + return process_mmap_event(event, offset, head); + + case PERF_EVENT_COMM: + return process_comm_event(event, offset, head); + + case PERF_EVENT_FORK: + return process_fork_event(event, offset, head); + + case PERF_EVENT_PERIOD: + return process_period_event(event, offset, head); + /* + * We dont process them right now but they are fine: + */ + + case PERF_EVENT_THROTTLE: + case PERF_EVENT_UNTHROTTLE: + return 0; + + default: + return -1; + } + + return 0; +} + +static int __cmd_report(void) +{ + int ret, rc = EXIT_FAILURE; + unsigned long offset = 0; + unsigned long head = 0; + struct stat stat; + event_t *event; + uint32_t size; + char *buf; + + register_idle_thread(); + + input = open(input_name, O_RDONLY); + if (input < 0) { + perror("failed to open file"); + exit(-1); + } + + ret = fstat(input, &stat); + if (ret < 0) { + perror("failed to stat file"); + exit(-1); + } + + if (!stat.st_size) { + fprintf(stderr, "zero-sized file, nothing to do!\n"); + exit(0); + } + + if (load_kernel() < 0) { + perror("failed to load kernel symbols"); + return EXIT_FAILURE; + } + + if (!full_paths) { + if (getcwd(__cwd, sizeof(__cwd)) == NULL) { + perror("failed to get the current directory"); + return EXIT_FAILURE; + } + cwdlen = strlen(cwd); + } else { + cwd = NULL; + cwdlen = 0; + } +remap: + buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, + MAP_SHARED, input, offset); + if (buf == MAP_FAILED) { + perror("failed to mmap file"); + exit(-1); + } + +more: + event = (event_t *)(buf + head); + + size = event->header.size; + if (!size) + size = 8; + + if (head + event->header.size >= page_size * mmap_window) { + unsigned long shift = page_size * (head / page_size); + int ret; + + ret = munmap(buf, page_size * mmap_window); + assert(ret == 0); + + offset += shift; + head -= shift; + goto remap; + } + + size = event->header.size; + + dprintf("%p [%p]: event: %d\n", + (void *)(offset + head), + (void *)(long)event->header.size, + event->header.type); + + if (!size || process_event(event, offset, head) < 0) { + + dprintf("%p [%p]: skipping unknown header type: %d\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->header.type); + + total_unknown++; + + /* + * assume we lost track of the stream, check alignment, and + * increment a single u64 in the hope to catch on again 'soon'. + */ + + if (unlikely(head & 7)) + head &= ~7ULL; + + size = 8; + } + + head += size; + + if (offset + head < stat.st_size) + goto more; + + rc = EXIT_SUCCESS; + close(input); + + dprintf(" IP events: %10ld\n", total); + dprintf(" mmap events: %10ld\n", total_mmap); + dprintf(" comm events: %10ld\n", total_comm); + dprintf(" fork events: %10ld\n", total_fork); + dprintf(" unknown events: %10ld\n", total_unknown); + + if (dump_trace) + return 0; + + if (verbose >= 3) + threads__fprintf(stdout); + + if (verbose >= 2) + dsos__fprintf(stdout); + + collapse__resort(); + output__resort(); + output__fprintf(stdout, total); + + return rc; +} + +static const char * const report_usage[] = { + "perf report [] ", + NULL +}; + +static const struct option options[] = { + OPT_STRING('i', "input", &input_name, "file", + "input file name"), + OPT_BOOLEAN('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), + OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, + "dump raw trace in ASCII"), + OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), + OPT_STRING('s', "sort", &sort_order, "key[,key2...]", + "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), + OPT_BOOLEAN('P', "full-paths", &full_paths, + "Don't shorten the pathnames taking into account the cwd"), + OPT_END() +}; + +static void setup_sorting(void) +{ + char *tmp, *tok, *str = strdup(sort_order); + + for (tok = strtok_r(str, ", ", &tmp); + tok; tok = strtok_r(NULL, ", ", &tmp)) { + if (sort_dimension__add(tok) < 0) { + error("Unknown --sort key: `%s'", tok); + usage_with_options(report_usage, options); + } + } + + free(str); +} + +int cmd_report(int argc, const char **argv, const char *prefix) +{ + symbol__init(); + + page_size = getpagesize(); + + argc = parse_options(argc, argv, options, report_usage, 0); + + setup_sorting(); + + /* + * Any (unrecognized) arguments left? + */ + if (argc) + usage_with_options(report_usage, options); + + setup_pager(); + + return __cmd_report(); +} -- cgit From a14832ff977e78d1982cdf78cdabb1f2320d9ac8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 7 Jun 2009 17:58:23 +0200 Subject: perf report: Print more expressive message in case of file open error Before: $ perf report failed to open file: No such file or directory After: $ perf report failed to open file: perf.data (try 'perf record' first) Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 242e09ff3658..f053a7463dcf 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1120,7 +1120,10 @@ static int __cmd_report(void) input = open(input_name, O_RDONLY); if (input < 0) { - perror("failed to open file"); + fprintf(stderr, " failed to open file: %s", input_name); + if (!strcmp(input_name, "perf.data")) + fprintf(stderr, " (try 'perf record' first)"); + fprintf(stderr, "\n"); exit(-1); } -- cgit From 80d496be89ed7dede5abee5c057634e80a31c82d Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 8 Jun 2009 21:12:48 +0300 Subject: perf report: Add support for profiling JIT generated code This patch adds support for profiling JIT generated code to 'perf report'. A JIT compiler is required to generate a "/tmp/perf-$PID.map" symbols map that is parsed when looking and displaying symbols. Thanks to Peter Zijlstra for his help with this patch! Example "perf report" output with the Jato JIT: # # (40311 samples) # # Overhead Command Shared Object Symbol # ........ ................ ......................... ...... # 97.80% jato /tmp/perf-11915.map [.] Fibonacci.fib(I)I 0.56% jato 00000000b7fa023b 0x000000b7fa023b 0.45% jato /tmp/perf-11915.map [.] Fibonacci.main([Ljava/lang/String;)V 0.38% jato [kernel] [k] get_page_from_freelist 0.06% jato [kernel] [k] kunmap_atomic 0.05% jato ./jato [.] utf8Hash 0.04% jato ./jato [.] executeJava 0.04% jato ./jato [.] defineClass Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Signed-off-by: Pekka Enberg Cc: a.p.zijlstra@chello.nl Cc: acme@redhat.com LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f053a7463dcf..61d871849b44 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -209,6 +209,11 @@ static uint64_t vdso__map_ip(struct map *map, uint64_t ip) return ip; } +static inline int is_anon_memory(const char *filename) +{ + return strcmp(filename, "//anon") == 0; +} + static struct map *map__new(struct mmap_event *event) { struct map *self = malloc(sizeof(*self)); @@ -216,6 +221,7 @@ static struct map *map__new(struct mmap_event *event) if (self != NULL) { const char *filename = event->filename; char newfilename[PATH_MAX]; + int anon; if (cwd) { int n = strcommon(filename); @@ -227,6 +233,13 @@ static struct map *map__new(struct mmap_event *event) } } + anon = is_anon_memory(filename); + + if (anon) { + snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", event->pid); + filename = newfilename; + } + self->start = event->start; self->end = event->start + event->len; self->pgoff = event->pgoff; @@ -235,7 +248,7 @@ static struct map *map__new(struct mmap_event *event) if (self->dso == NULL) goto out_delete; - if (self->dso == vdso) + if (self->dso == vdso || anon) self->map_ip = vdso__map_ip; else self->map_ip = map__map_ip; -- cgit From aefcf37b82886260d8540c9fb815e613c8977e06 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jun 2009 23:15:28 +0200 Subject: perf_counter tools: Standardize color printing The rule is: - high overhead: red - mid overhead: green - low overhead: normal (white/black) Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 61d871849b44..0b18cb99a858 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -693,13 +693,16 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples) char *color = PERF_COLOR_NORMAL; /* - * We color high-overhead entries in red, low-overhead - * entries in green - and keep the middle ground normal: + * We color high-overhead entries in red, mid-overhead + * entries in green - and keep the low overhead places + * normal: */ - if (percent >= 5.0) + if (percent >= 5.0) { color = PERF_COLOR_RED; - if (percent < 0.5) - color = PERF_COLOR_GREEN; + } else { + if (percent >= 0.5) + color = PERF_COLOR_GREEN; + } ret = color_fprintf(fp, color, " %6.2f%%", (self->count * 100.0) / total_samples); -- cgit From 4502d77c1d8f15f20c04b92cb96c12d4e465de29 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2009 15:03:06 +0200 Subject: perf_counter tools: Small frequency related fixes Create the counter in a disabled state and only enable it after we mmap() the buffer, this allows us to see the first few samples (and observe the frequency ramp). Furthermore, print the period in the verbose report. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0b18cb99a858..9a0e31e79e9d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -47,6 +47,7 @@ struct ip_event { struct perf_event_header header; __u64 ip; __u32 pid, tid; + __u64 period; }; struct mmap_event { @@ -943,12 +944,13 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) uint64_t ip = event->ip.ip; struct map *map = NULL; - dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p\n", + dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, event->ip.pid, - (void *)(long)ip); + (void *)(long)ip, + (long long)event->ip.period); dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); -- cgit From ea1900e571d40a3ce60c835c2f21e1fd8c5cb663 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2009 21:45:22 +0200 Subject: perf_counter tools: Normalize data using per sample period data When we use variable period sampling, add the period to the sample data and use that to normalize the samples. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9a0e31e79e9d..f57fd5c5531a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -456,7 +456,7 @@ struct hist_entry { uint64_t ip; char level; - uint32_t count; + uint64_t count; }; /* @@ -726,7 +726,7 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples) static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, uint64_t ip, char level) + struct symbol *sym, uint64_t ip, char level, uint64_t count) { struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; @@ -738,7 +738,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, .sym = sym, .ip = ip, .level = level, - .count = 1, + .count = count, }; int cmp; @@ -749,7 +749,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, cmp = hist_entry__cmp(&entry, he); if (!cmp) { - he->count++; + he->count += count; return 0; } @@ -942,15 +942,19 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) struct dso *dso = NULL; struct thread *thread = threads__findnew(event->ip.pid); uint64_t ip = event->ip.ip; + uint64_t period = 1; struct map *map = NULL; + if (event->header.type & PERF_SAMPLE_PERIOD) + period = event->ip.period; + dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, event->ip.pid, (void *)(long)ip, - (long long)event->ip.period); + (long long)period); dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); @@ -1001,13 +1005,13 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) if (dso) sym = dso->find_symbol(dso, ip); - if (hist_entry__add(thread, map, dso, sym, ip, level)) { + if (hist_entry__add(thread, map, dso, sym, ip, level, period)) { fprintf(stderr, "problem incrementing symbol count, skipping event\n"); return -1; } } - total++; + total += period; return 0; } -- cgit From 729ff5e2aaf181f5d3ab849337fce406cd19b1d9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 11 Jun 2009 14:16:15 +0200 Subject: perf_counter tools: Clean up u64 usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A build error slipped in: builtin-report.c: In function ‘hist_entry__fprintf’: builtin-report.c:711: error: format ‘%12d’ expects type ‘int’, but argument 3 has type ‘uint64_t’ Because we got a bit sloppy with those types. uint64_t really sucks, because there's no printf format for it. So standardize on __u64 instead - for all types that go to or come from the ABI (which is __u64), or for values that need to be large enough even on 32-bit. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f57fd5c5531a..82fa93b4db99 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -146,7 +146,7 @@ static void dsos__fprintf(FILE *fp) dso__fprintf(pos, fp); } -static struct symbol *vdso__find_symbol(struct dso *dso, uint64_t ip) +static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip) { return dso__find_symbol(kernel_dso, ip); } @@ -193,19 +193,19 @@ static int strcommon(const char *pathname) struct map { struct list_head node; - uint64_t start; - uint64_t end; - uint64_t pgoff; - uint64_t (*map_ip)(struct map *, uint64_t); + __u64 start; + __u64 end; + __u64 pgoff; + __u64 (*map_ip)(struct map *, __u64); struct dso *dso; }; -static uint64_t map__map_ip(struct map *map, uint64_t ip) +static __u64 map__map_ip(struct map *map, __u64 ip) { return ip - map->start + map->pgoff; } -static uint64_t vdso__map_ip(struct map *map, uint64_t ip) +static __u64 vdso__map_ip(struct map *map, __u64 ip) { return ip; } @@ -288,7 +288,7 @@ static int map__overlap(struct map *l, struct map *r) static size_t map__fprintf(struct map *self, FILE *fp) { - return fprintf(fp, " %"PRIx64"-%"PRIx64" %"PRIx64" %s\n", + return fprintf(fp, " %Lx-%Lx %Lx %s\n", self->start, self->end, self->pgoff, self->dso->name); } @@ -412,7 +412,7 @@ static int thread__fork(struct thread *self, struct thread *parent) return 0; } -static struct map *thread__find_map(struct thread *self, uint64_t ip) +static struct map *thread__find_map(struct thread *self, __u64 ip) { struct map *pos; @@ -453,10 +453,10 @@ struct hist_entry { struct map *map; struct dso *dso; struct symbol *sym; - uint64_t ip; + __u64 ip; char level; - uint64_t count; + __u64 count; }; /* @@ -572,7 +572,7 @@ static struct sort_entry sort_dso = { static int64_t sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { - uint64_t ip_l, ip_r; + __u64 ip_l, ip_r; if (left->sym == right->sym) return 0; @@ -684,7 +684,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) } static size_t -hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples) +hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) { struct sort_entry *se; size_t ret; @@ -708,7 +708,7 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples) ret = color_fprintf(fp, color, " %6.2f%%", (self->count * 100.0) / total_samples); } else - ret = fprintf(fp, "%12d ", self->count); + ret = fprintf(fp, "%12Ld ", self->count); list_for_each_entry(se, &hist_entry__sort_list, list) { fprintf(fp, " "); @@ -726,7 +726,7 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, uint64_t total_samples) static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, uint64_t ip, char level, uint64_t count) + struct symbol *sym, __u64 ip, char level, __u64 count) { struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; @@ -873,7 +873,7 @@ static void output__resort(void) } } -static size_t output__fprintf(FILE *fp, uint64_t total_samples) +static size_t output__fprintf(FILE *fp, __u64 total_samples) { struct hist_entry *pos; struct sort_entry *se; @@ -941,8 +941,8 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) int show = 0; struct dso *dso = NULL; struct thread *thread = threads__findnew(event->ip.pid); - uint64_t ip = event->ip.ip; - uint64_t period = 1; + __u64 ip = event->ip.ip; + __u64 period = 1; struct map *map = NULL; if (event->header.type & PERF_SAMPLE_PERIOD) -- cgit From 8465b05046652cfde3d47692cab2e8ba962f140f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Jun 2009 14:44:07 +0200 Subject: perf report: Print out raw events in hexa Print out events in hexa dump format, when -D is specified: 0x4868 [0x48]: event: 1 . . ... raw event: size 72 bytes . 0000: 01 00 00 00 00 00 48 00 d4 72 00 00 d4 72 00 00 ......H..r...r. . 0010: 00 00 40 f2 3e 00 00 00 00 30 01 00 00 00 00 00 ..@.>....0..... . 0020: 00 00 00 00 00 00 00 00 2f 75 73 72 2f 6c 69 62 ......../usr/li . 0030: 36 34 2f 6c 69 62 65 6c 66 2d 30 2e 31 34 31 2e 64/libelf-0.141 . 0040: 73 6f 00 00 00 00 00 00 f-0.141 . 0x4868 [0x48]: PERF_EVENT_MMAP 29396: [0x3ef2400000(0x13000) @ (nil)]: /usr/lib64/libelf-0.141.so This helps the debugging of mis-parsing of data files, and helps the addition of new sample/trace formats. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 82fa93b4db99..37515da637f7 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1095,9 +1095,43 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) return 0; } +static void trace_event(event_t *event) +{ + unsigned char *raw_event = (void *)event; + int i, j; + + if (!dump_trace) + return; + + dprintf(".\n. ... raw event: size %d bytes\n", event->header.size); + + for (i = 0; i < event->header.size; i++) { + if ((i & 15) == 0) + dprintf(". %04x: ", i); + + dprintf(" %02x", raw_event[i]); + + if (((i & 15) == 15) || i == event->header.size-1) { + dprintf(" "); + for (j = 0; j < 15-(i & 15); j++) + dprintf(" "); + for (j = 0; j < (i & 15); j++) { + if (isprint(raw_event[i-15+j])) + dprintf("%c", raw_event[i-15+j]); + else + dprintf("."); + } + dprintf("\n"); + } + } + dprintf(".\n"); +} + static int process_event(event_t *event, unsigned long offset, unsigned long head) { + trace_event(event); + if (event->header.misc & PERF_EVENT_MISC_OVERFLOW) return process_overflow_event(event, offset, head); @@ -1204,7 +1238,7 @@ more: size = event->header.size; - dprintf("%p [%p]: event: %d\n", + dprintf("\n%p [%p]: event: %d\n", (void *)(offset + head), (void *)(long)event->header.size, event->header.type); -- cgit From 3efa1cc99ec51bc7a7ae0011a16619fd20dbe6ea Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 14 Jun 2009 15:04:15 +0200 Subject: perf record/report: Add call graph / call chain profiling Add the first steps of call-graph profiling: - add the -c (--call-graph) option to perf record - parse the call-graph record and printout out under -D (--dump-trace) The call-graph data is not put into the histogram yet, but it can be seen that it's being processed correctly: 0x3ce0 [0x38]: event: 35 . . ... raw event: size 56 bytes . 0000: 23 00 00 00 05 00 38 00 d4 df 0e 81 ff ff ff ff #.....8........ . 0010: 60 0b 00 00 60 0b 00 00 03 00 00 00 01 00 02 00 `...`.......... . 0020: d4 df 0e 81 ff ff ff ff a0 61 ed 41 36 00 00 00 .........a.A6.. . 0030: 04 92 e6 41 36 00 00 00 .a.A6.. . 0x3ce0 [0x38]: PERF_EVENT (IP, 5): 2912: 0xffffffff810edfd4 period: 1 ... chain: u:2, k:1, nr:3 ..... 0: 0xffffffff810edfd4 ..... 1: 0x3641ed61a0 ..... 2: 0x3641e69204 ... thread: perf:2912 ...... dso: [kernel] This shows a 3-entry call-graph: with 1 kernel-space and two user-space entries Cc: Frederic Weisbecker Cc: Pekka Enberg Cc: Arjan van de Ven Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 57 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 12 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 37515da637f7..aebba5659345 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -36,6 +36,7 @@ static int show_mask = SHOW_KERNEL | SHOW_USER | SHOW_HV; static int dump_trace = 0; #define dprintf(x...) do { if (dump_trace) printf(x); } while (0) +#define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0) static int verbose; static int full_paths; @@ -43,11 +44,19 @@ static int full_paths; static unsigned long page_size; static unsigned long mmap_window = 32; +struct ip_chain_event { + __u16 nr; + __u16 hv; + __u16 kernel; + __u16 user; + __u64 ips[]; +}; + struct ip_event { struct perf_event_header header; __u64 ip; __u32 pid, tid; - __u64 period; + unsigned char __more_data[]; }; struct mmap_event { @@ -944,9 +953,13 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) __u64 ip = event->ip.ip; __u64 period = 1; struct map *map = NULL; + void *more_data = event->ip.__more_data; + struct ip_chain_event *chain; - if (event->header.type & PERF_SAMPLE_PERIOD) - period = event->ip.period; + if (event->header.type & PERF_SAMPLE_PERIOD) { + period = *(__u64 *)more_data; + more_data += sizeof(__u64); + } dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", (void *)(offset + head), @@ -956,6 +969,22 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) (void *)(long)ip, (long long)period); + if (event->header.type & PERF_SAMPLE_CALLCHAIN) { + int i; + + chain = (void *)more_data; + + if (dump_trace) { + dprintf("... chain: u:%d, k:%d, nr:%d\n", + chain->user, + chain->kernel, + chain->nr); + + for (i = 0; i < chain->nr; i++) + dprintf("..... %2d: %p\n", i, (void *)chain->ips[i]); + } + } + dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); if (thread == NULL) { @@ -1098,30 +1127,34 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) static void trace_event(event_t *event) { unsigned char *raw_event = (void *)event; + char *color = PERF_COLOR_BLUE; int i, j; if (!dump_trace) return; - dprintf(".\n. ... raw event: size %d bytes\n", event->header.size); + dprintf("."); + cdprintf("\n. ... raw event: size %d bytes\n", event->header.size); for (i = 0; i < event->header.size; i++) { - if ((i & 15) == 0) - dprintf(". %04x: ", i); + if ((i & 15) == 0) { + dprintf("."); + cdprintf(" %04x: ", i); + } - dprintf(" %02x", raw_event[i]); + cdprintf(" %02x", raw_event[i]); if (((i & 15) == 15) || i == event->header.size-1) { - dprintf(" "); + cdprintf(" "); for (j = 0; j < 15-(i & 15); j++) - dprintf(" "); + cdprintf(" "); for (j = 0; j < (i & 15); j++) { if (isprint(raw_event[i-15+j])) - dprintf("%c", raw_event[i-15+j]); + cdprintf("%c", raw_event[i-15+j]); else - dprintf("."); + cdprintf("."); } - dprintf("\n"); + cdprintf("\n"); } } dprintf(".\n"); -- cgit From 3dfabc74c65904c9e6cf952391312d16ea772ef5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Jun 2009 11:24:38 +0200 Subject: perf report: Add per system call overhead histogram Take advantage of call-graph percounter sampling/recording to display a non-trivial histogram: the true, collapsed/summarized cost measurement, on a per system call total overhead basis: aldebaran:~/linux/linux/tools/perf> ./perf record -g -a -f ~/hackbench 10 aldebaran:~/linux/linux/tools/perf> ./perf report -s symbol --syscalls | head -10 # # (3536 samples) # # Overhead Symbol # ........ ...... # 40.75% [k] sys_write 40.21% [k] sys_read 4.44% [k] do_nmi ... This is done by accounting each (reliable) call-chain that chains back to a given system call to that system call function. [ So in the above example we can see that hackbench spends about 40% of its total time somewhere in sys_write() and 40% somewhere in sys_read(), the rest of the time is spent in user-space. The time is not spent in sys_write() _itself_ but in one of its many child functions. ] Or, a recording of a (source files are already in the page-cache) kernel build: $ perf record -g -m 512 -f -- make -j32 kernel $ perf report -s s --syscalls | grep '\[k\]' | grep -v nmi 4.14% [k] do_page_fault 1.20% [k] sys_write 1.10% [k] sys_open 0.63% [k] sys_exit_group 0.48% [k] smp_apic_timer_interrupt 0.37% [k] sys_read 0.37% [k] sys_execve 0.20% [k] sys_mmap 0.18% [k] sys_close 0.14% [k] sys_munmap 0.13% [k] sys_poll 0.09% [k] sys_newstat 0.07% [k] sys_clone 0.06% [k] sys_newfstat 0.05% [k] sys_access 0.05% [k] schedule Shows the true total cost of each syscall variant that gets used during a kernel build. This profile reveals it that pagefaults are the costliest, followed by read()/write(). An interesting detail: timer interrupts cost 0.5% - or 0.5 seconds per 100 seconds of kernel build-time. (this was done with HZ=1000) The summary is done in 'perf report', i.e. in the post-processing stage - so once we have a good call-graph recording, this type of non-trivial high-level analysis becomes possible. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Frederic Weisbecker Cc: Pekka Enberg LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index aebba5659345..1e2f5dde312c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -40,6 +40,7 @@ static int dump_trace = 0; static int verbose; static int full_paths; +static int collapse_syscalls; static unsigned long page_size; static unsigned long mmap_window = 32; @@ -983,6 +984,15 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) for (i = 0; i < chain->nr; i++) dprintf("..... %2d: %p\n", i, (void *)chain->ips[i]); } + if (collapse_syscalls) { + /* + * Find the all-but-last kernel entry + * amongst the call-chains - to get + * to the level of system calls: + */ + if (chain->kernel >= 2) + ip = chain->ips[chain->kernel-2]; + } } dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); @@ -1343,6 +1353,8 @@ static const struct option options[] = { "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), OPT_BOOLEAN('P', "full-paths", &full_paths, "Don't shorten the pathnames taking into account the cwd"), + OPT_BOOLEAN('S', "syscalls", &collapse_syscalls, + "show per syscall summary overhead, using call graph"), OPT_END() }; -- cgit From e2eae0f5605b90a0838608043c21050b08b6dd95 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 15 Jun 2009 16:15:19 +0200 Subject: perf report: Fix 32-bit printf format Yong Wang reported the following compiler warning: builtin-report.c: In function 'process_overflow_event': builtin-report.c:984: error: cast to pointer from integer of different size Which happens because we try to print ->ips[] out with a limited format, losing the high 32 bits. Print it out using %016Lx instead. Reported-by: Yong Wang Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1e2f5dde312c..f86bb07c0e84 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -982,7 +982,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) chain->nr); for (i = 0; i < chain->nr; i++) - dprintf("..... %2d: %p\n", i, (void *)chain->ips[i]); + dprintf("..... %2d: %016Lx\n", i, chain->ips[i]); } if (collapse_syscalls) { /* -- cgit From 6e7d6fdcbeefa9434653b5e5da12909636ea1d52 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 17 Jun 2009 15:51:44 +0200 Subject: perf report: Add --sort --call <$regex> Implement sorting by callchain symbols, --sort . It will create a new column which will show a match to --call $regex or "[unmatched]". Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 209 +++++++++++++++++++++++++++++++++----------- 1 file changed, 158 insertions(+), 51 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index f86bb07c0e84..cd74b2e58adb 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -40,11 +40,13 @@ static int dump_trace = 0; static int verbose; static int full_paths; -static int collapse_syscalls; static unsigned long page_size; static unsigned long mmap_window = 32; +static char *call = "^sys_"; +static regex_t call_regex; + struct ip_chain_event { __u16 nr; __u16 hv; @@ -463,6 +465,7 @@ struct hist_entry { struct map *map; struct dso *dso; struct symbol *sym; + struct symbol *call; __u64 ip; char level; @@ -483,6 +486,16 @@ struct sort_entry { size_t (*print)(FILE *fp, struct hist_entry *); }; +static int64_t cmp_null(void *l, void *r) +{ + if (!l && !r) + return 0; + else if (!l) + return -1; + else + return 1; +} + /* --sort pid */ static int64_t @@ -517,14 +530,8 @@ sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) char *comm_l = left->thread->comm; char *comm_r = right->thread->comm; - if (!comm_l || !comm_r) { - if (!comm_l && !comm_r) - return 0; - else if (!comm_l) - return -1; - else - return 1; - } + if (!comm_l || !comm_r) + return cmp_null(comm_l, comm_r); return strcmp(comm_l, comm_r); } @@ -550,14 +557,8 @@ sort__dso_cmp(struct hist_entry *left, struct hist_entry *right) struct dso *dso_l = left->dso; struct dso *dso_r = right->dso; - if (!dso_l || !dso_r) { - if (!dso_l && !dso_r) - return 0; - else if (!dso_l) - return -1; - else - return 1; - } + if (!dso_l || !dso_r) + return cmp_null(dso_l, dso_r); return strcmp(dso_l->name, dso_r->name); } @@ -617,7 +618,38 @@ static struct sort_entry sort_sym = { .print = sort__sym_print, }; +/* --sort call */ + +static int64_t +sort__call_cmp(struct hist_entry *left, struct hist_entry *right) +{ + struct symbol *sym_l = left->call; + struct symbol *sym_r = right->call; + + if (!sym_l || !sym_r) + return cmp_null(sym_l, sym_r); + + return strcmp(sym_l->name, sym_r->name); +} + +static size_t +sort__call_print(FILE *fp, struct hist_entry *self) +{ + size_t ret = 0; + + ret += fprintf(fp, "%-20s", self->call ? self->call->name : "[unmatched]"); + + return ret; +} + +static struct sort_entry sort_call = { + .header = "Callchain symbol ", + .cmp = sort__call_cmp, + .print = sort__call_print, +}; + static int sort__need_collapse = 0; +static int sort__has_call = 0; struct sort_dimension { char *name; @@ -630,6 +662,7 @@ static struct sort_dimension sort_dimensions[] = { { .name = "comm", .entry = &sort_comm, }, { .name = "dso", .entry = &sort_dso, }, { .name = "symbol", .entry = &sort_sym, }, + { .name = "call", .entry = &sort_call, }, }; static LIST_HEAD(hist_entry__sort_list); @@ -650,6 +683,18 @@ static int sort_dimension__add(char *tok) if (sd->entry->collapse) sort__need_collapse = 1; + if (sd->entry == &sort_call) { + int ret = regcomp(&call_regex, call, REG_EXTENDED); + if (ret) { + char err[BUFSIZ]; + + regerror(ret, &call_regex, err, sizeof(err)); + fprintf(stderr, "Invalid regex: %s\n%s", call, err); + exit(-1); + } + sort__has_call = 1; + } + list_add_tail(&sd->entry->list, &hist_entry__sort_list); sd->taken = 1; @@ -730,13 +775,76 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) return ret; } +/* + * + */ + +static struct symbol * +resolve_symbol(struct thread *thread, struct map **mapp, + struct dso **dsop, __u64 *ipp) +{ + struct dso *dso = dsop ? *dsop : NULL; + struct map *map = mapp ? *mapp : NULL; + uint64_t ip = *ipp; + + if (!thread) + return NULL; + + if (dso) + goto got_dso; + + if (map) + goto got_map; + + map = thread__find_map(thread, ip); + if (map != NULL) { + if (mapp) + *mapp = map; +got_map: + ip = map->map_ip(map, ip); + *ipp = ip; + + dso = map->dso; + } else { + /* + * If this is outside of all known maps, + * and is a negative address, try to look it + * up in the kernel dso, as it might be a + * vsyscall (which executes in user-mode): + */ + if ((long long)ip < 0) + dso = kernel_dso; + } + dprintf(" ...... dso: %s\n", dso ? dso->name : ""); + + if (dsop) + *dsop = dso; + + if (!dso) + return NULL; +got_dso: + return dso->find_symbol(dso, ip); +} + +static struct symbol *call__match(struct symbol *sym) +{ + if (!sym) + return NULL; + + if (sym->name && !regexec(&call_regex, sym->name, 0, NULL, 0)) + return sym; + + return NULL; +} + /* * collect histogram counts */ static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, char level, __u64 count) + struct symbol *sym, __u64 ip, struct ip_chain_event *chain, + char level, __u64 count) { struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; @@ -752,6 +860,33 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, }; int cmp; + if (sort__has_call && chain) { + int i, nr = chain->hv; + struct symbol *sym; + struct dso *dso; + __u64 ip; + + for (i = 0; i < chain->kernel; i++) { + ip = chain->ips[nr + i]; + dso = kernel_dso; + sym = resolve_symbol(thread, NULL, &dso, &ip); + entry.call = call__match(sym); + if (entry.call) + goto got_call; + } + nr += i; + + for (i = 0; i < chain->user; i++) { + ip = chain->ips[nr + i]; + sym = resolve_symbol(thread, NULL, NULL, &ip); + entry.call = call__match(sym); + if (entry.call) + goto got_call; + } + nr += i; + } +got_call: + while (*p != NULL) { parent = *p; he = rb_entry(parent, struct hist_entry, rb_node); @@ -955,7 +1090,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) __u64 period = 1; struct map *map = NULL; void *more_data = event->ip.__more_data; - struct ip_chain_event *chain; + struct ip_chain_event *chain = NULL; if (event->header.type & PERF_SAMPLE_PERIOD) { period = *(__u64 *)more_data; @@ -984,15 +1119,6 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) for (i = 0; i < chain->nr; i++) dprintf("..... %2d: %016Lx\n", i, chain->ips[i]); } - if (collapse_syscalls) { - /* - * Find the all-but-last kernel entry - * amongst the call-chains - to get - * to the level of system calls: - */ - if (chain->kernel >= 2) - ip = chain->ips[chain->kernel-2]; - } } dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); @@ -1016,22 +1142,6 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) show = SHOW_USER; level = '.'; - map = thread__find_map(thread, ip); - if (map != NULL) { - ip = map->map_ip(map, ip); - dso = map->dso; - } else { - /* - * If this is outside of all known maps, - * and is a negative address, try to look it - * up in the kernel dso, as it might be a - * vsyscall (which executes in user-mode): - */ - if ((long long)ip < 0) - dso = kernel_dso; - } - dprintf(" ...... dso: %s\n", dso ? dso->name : ""); - } else { show = SHOW_HV; level = 'H'; @@ -1039,12 +1149,9 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) } if (show & show_mask) { - struct symbol *sym = NULL; - - if (dso) - sym = dso->find_symbol(dso, ip); + struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); - if (hist_entry__add(thread, map, dso, sym, ip, level, period)) { + if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { fprintf(stderr, "problem incrementing symbol count, skipping event\n"); return -1; @@ -1353,8 +1460,8 @@ static const struct option options[] = { "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), OPT_BOOLEAN('P', "full-paths", &full_paths, "Don't shorten the pathnames taking into account the cwd"), - OPT_BOOLEAN('S', "syscalls", &collapse_syscalls, - "show per syscall summary overhead, using call graph"), + OPT_STRING('c', "call", &call, "regex", + "regex to use for --sort call"), OPT_END() }; -- cgit From 5aa75a0fd4bc6402899e06fdb853cab024d65055 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 15 Jun 2009 20:11:41 +0200 Subject: perf_counter tools: Replace isprint() with issane() The Git utils came with a ctype replacement that doesn't provide isprint(). Add a replacement. Solves a build bug on certain distros. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index cd74b2e58adb..707f60ce32fd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1266,7 +1266,7 @@ static void trace_event(event_t *event) for (j = 0; j < 15-(i & 15); j++) cdprintf(" "); for (j = 0; j < (i & 15); j++) { - if (isprint(raw_event[i-15+j])) + if (issane(raw_event[i-15+j])) cdprintf("%c", raw_event[i-15+j]); else cdprintf("."); -- cgit From b25bcf2f133b1e6216c3d40be394756107d3880f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 18 Jun 2009 07:01:03 +0200 Subject: perf report: Tidy up the "--parent " and "--sort parent" call-chain features Instead of the ambigious 'call' naming use the much more specific 'parent' naming: - rename --call to --parent - rename --sort call to --sort parent - rename [unmatched] to [other] - to signal that this is not an error but the inverse set Also add pagefaults to the default parent-symbol pattern too, as it's a 'syscall overhead category' in a sense. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 67 +++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 33 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 707f60ce32fd..986834623b43 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -44,8 +44,8 @@ static int full_paths; static unsigned long page_size; static unsigned long mmap_window = 32; -static char *call = "^sys_"; -static regex_t call_regex; +static char *parent_pattern = "^sys_|^do_page_fault"; +static regex_t parent_regex; struct ip_chain_event { __u16 nr; @@ -465,7 +465,7 @@ struct hist_entry { struct map *map; struct dso *dso; struct symbol *sym; - struct symbol *call; + struct symbol *parent; __u64 ip; char level; @@ -618,13 +618,13 @@ static struct sort_entry sort_sym = { .print = sort__sym_print, }; -/* --sort call */ +/* --sort parent */ static int64_t -sort__call_cmp(struct hist_entry *left, struct hist_entry *right) +sort__parent_cmp(struct hist_entry *left, struct hist_entry *right) { - struct symbol *sym_l = left->call; - struct symbol *sym_r = right->call; + struct symbol *sym_l = left->parent; + struct symbol *sym_r = right->parent; if (!sym_l || !sym_r) return cmp_null(sym_l, sym_r); @@ -633,23 +633,23 @@ sort__call_cmp(struct hist_entry *left, struct hist_entry *right) } static size_t -sort__call_print(FILE *fp, struct hist_entry *self) +sort__parent_print(FILE *fp, struct hist_entry *self) { size_t ret = 0; - ret += fprintf(fp, "%-20s", self->call ? self->call->name : "[unmatched]"); + ret += fprintf(fp, "%-20s", self->parent ? self->parent->name : "[other]"); return ret; } -static struct sort_entry sort_call = { - .header = "Callchain symbol ", - .cmp = sort__call_cmp, - .print = sort__call_print, +static struct sort_entry sort_parent = { + .header = "Parent symbol ", + .cmp = sort__parent_cmp, + .print = sort__parent_print, }; static int sort__need_collapse = 0; -static int sort__has_call = 0; +static int sort__has_parent = 0; struct sort_dimension { char *name; @@ -662,7 +662,7 @@ static struct sort_dimension sort_dimensions[] = { { .name = "comm", .entry = &sort_comm, }, { .name = "dso", .entry = &sort_dso, }, { .name = "symbol", .entry = &sort_sym, }, - { .name = "call", .entry = &sort_call, }, + { .name = "parent", .entry = &sort_parent, }, }; static LIST_HEAD(hist_entry__sort_list); @@ -683,16 +683,17 @@ static int sort_dimension__add(char *tok) if (sd->entry->collapse) sort__need_collapse = 1; - if (sd->entry == &sort_call) { - int ret = regcomp(&call_regex, call, REG_EXTENDED); + if (sd->entry == &sort_parent) { + int ret = regcomp(&parent_regex, parent_pattern, REG_EXTENDED); if (ret) { char err[BUFSIZ]; - regerror(ret, &call_regex, err, sizeof(err)); - fprintf(stderr, "Invalid regex: %s\n%s", call, err); + regerror(ret, &parent_regex, err, sizeof(err)); + fprintf(stderr, "Invalid regex: %s\n%s", + parent_pattern, err); exit(-1); } - sort__has_call = 1; + sort__has_parent = 1; } list_add_tail(&sd->entry->list, &hist_entry__sort_list); @@ -831,7 +832,7 @@ static struct symbol *call__match(struct symbol *sym) if (!sym) return NULL; - if (sym->name && !regexec(&call_regex, sym->name, 0, NULL, 0)) + if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) return sym; return NULL; @@ -844,7 +845,7 @@ static struct symbol *call__match(struct symbol *sym) static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, struct symbol *sym, __u64 ip, struct ip_chain_event *chain, - char level, __u64 count) + char level, __u64 count) { struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; @@ -860,7 +861,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, }; int cmp; - if (sort__has_call && chain) { + if (sort__has_parent && chain) { int i, nr = chain->hv; struct symbol *sym; struct dso *dso; @@ -870,22 +871,22 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, ip = chain->ips[nr + i]; dso = kernel_dso; sym = resolve_symbol(thread, NULL, &dso, &ip); - entry.call = call__match(sym); - if (entry.call) - goto got_call; + entry.parent = call__match(sym); + if (entry.parent) + goto got_parent; } nr += i; for (i = 0; i < chain->user; i++) { ip = chain->ips[nr + i]; sym = resolve_symbol(thread, NULL, NULL, &ip); - entry.call = call__match(sym); - if (entry.call) - goto got_call; + entry.parent = call__match(sym); + if (entry.parent) + goto got_parent; } nr += i; } -got_call: +got_parent: while (*p != NULL) { parent = *p; @@ -1457,11 +1458,11 @@ static const struct option options[] = { "dump raw trace in ASCII"), OPT_STRING('k', "vmlinux", &vmlinux, "file", "vmlinux pathname"), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol. Default: pid,symbol"), + "sort by key(s): pid, comm, dso, symbol, parent"), OPT_BOOLEAN('P', "full-paths", &full_paths, "Don't shorten the pathnames taking into account the cwd"), - OPT_STRING('c', "call", &call, "regex", - "regex to use for --sort call"), + OPT_STRING('p', "parent", &parent_pattern, "regex", + "regex filter to identify parent, see: '--sort parent'"), OPT_END() }; -- cgit From 7522060c95395f479ee4a6af3bbf9e097e92e48f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 18 Jun 2009 08:00:17 +0200 Subject: perf report: Add validation of call-chain entries Add boundary checks for call-chain events. In case of corrupted entries we could crash otherwise. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 74 ++++++++++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 28 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 986834623b43..e14e98676171 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -39,6 +39,8 @@ static int dump_trace = 0; #define cdprintf(x...) do { if (dump_trace) color_fprintf(stdout, color, x); } while (0) static int verbose; +#define eprintf(x...) do { if (verbose) fprintf(stderr, x); } while (0) + static int full_paths; static unsigned long page_size; @@ -47,14 +49,6 @@ static unsigned long mmap_window = 32; static char *parent_pattern = "^sys_|^do_page_fault"; static regex_t parent_regex; -struct ip_chain_event { - __u16 nr; - __u16 hv; - __u16 kernel; - __u16 user; - __u64 ips[]; -}; - struct ip_event { struct perf_event_header header; __u64 ip; @@ -131,15 +125,11 @@ static struct dso *dsos__findnew(const char *name) nr = dso__load(dso, NULL, verbose); if (nr < 0) { - if (verbose) - fprintf(stderr, "Failed to open: %s\n", name); + eprintf("Failed to open: %s\n", name); goto out_delete_dso; } - if (!nr && verbose) { - fprintf(stderr, - "No symbols found in: %s, maybe install a debug package?\n", - name); - } + if (!nr) + eprintf("No symbols found in: %s, maybe install a debug package?\n", name); dsos__add(dso); @@ -844,7 +834,7 @@ static struct symbol *call__match(struct symbol *sym) static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, struct ip_chain_event *chain, + struct symbol *sym, __u64 ip, struct perf_callchain_entry *chain, char level, __u64 count) { struct rb_node **p = &hist.rb_node; @@ -868,7 +858,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, __u64 ip; for (i = 0; i < chain->kernel; i++) { - ip = chain->ips[nr + i]; + ip = chain->ip[nr + i]; dso = kernel_dso; sym = resolve_symbol(thread, NULL, &dso, &ip); entry.parent = call__match(sym); @@ -878,7 +868,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, nr += i; for (i = 0; i < chain->user; i++) { - ip = chain->ips[nr + i]; + ip = chain->ip[nr + i]; sym = resolve_symbol(thread, NULL, NULL, &ip); entry.parent = call__match(sym); if (entry.parent) @@ -1080,6 +1070,30 @@ static unsigned long total = 0, total_fork = 0, total_unknown = 0; +static int validate_chain(struct perf_callchain_entry *chain, event_t *event) +{ + unsigned int chain_size; + + if (chain->nr > MAX_STACK_DEPTH) + return -1; + if (chain->hv > MAX_STACK_DEPTH) + return -1; + if (chain->kernel > MAX_STACK_DEPTH) + return -1; + if (chain->user > MAX_STACK_DEPTH) + return -1; + if (chain->hv + chain->kernel + chain->user != chain->nr) + return -1; + + chain_size = event->header.size; + chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; + + if (chain->nr*sizeof(__u64) > chain_size) + return -1; + + return 0; +} + static int process_overflow_event(event_t *event, unsigned long offset, unsigned long head) { @@ -1091,7 +1105,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) __u64 period = 1; struct map *map = NULL; void *more_data = event->ip.__more_data; - struct ip_chain_event *chain = NULL; + struct perf_callchain_entry *chain = NULL; if (event->header.type & PERF_SAMPLE_PERIOD) { period = *(__u64 *)more_data; @@ -1111,21 +1125,26 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) chain = (void *)more_data; - if (dump_trace) { - dprintf("... chain: u:%d, k:%d, nr:%d\n", - chain->user, - chain->kernel, - chain->nr); + dprintf("... chain: u:%d, k:%d, nr:%d\n", + chain->user, + chain->kernel, + chain->nr); + if (validate_chain(chain, event) < 0) { + eprintf("call-chain problem with event, skipping it.\n"); + return 0; + } + + if (dump_trace) { for (i = 0; i < chain->nr; i++) - dprintf("..... %2d: %016Lx\n", i, chain->ips[i]); + dprintf("..... %2d: %016Lx\n", i, chain->ip[i]); } } dprintf(" ... thread: %s:%d\n", thread->comm, thread->pid); if (thread == NULL) { - fprintf(stderr, "problem processing %d event, skipping it.\n", + eprintf("problem processing %d event, skipping it.\n", event->header.type); return -1; } @@ -1153,8 +1172,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) struct symbol *sym = resolve_symbol(thread, &map, &dso, &ip); if (hist_entry__add(thread, map, dso, sym, ip, chain, level, period)) { - fprintf(stderr, - "problem incrementing symbol count, skipping event\n"); + eprintf("problem incrementing symbol count, skipping event\n"); return -1; } } -- cgit From a73c7d84a1975b44c0ebd03c2dec288af1426349 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2009 09:44:20 +0200 Subject: perf_counter tools: Add and use isprint() Introduce isprint() to print out raw event dumps to ASCII, etc. (This is an extension to upstream Git's ctype.c.) Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: [ removed openssl.h inclusion from util.h - it leaked ctype.h ] Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index e14e98676171..9a3805f0c9f2 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1285,7 +1285,7 @@ static void trace_event(event_t *event) for (j = 0; j < 15-(i & 15); j++) cdprintf(" "); for (j = 0; j < (i & 15); j++) { - if (issane(raw_event[i-15+j])) + if (isprint(raw_event[i-15+j])) cdprintf("%c", raw_event[i-15+j]); else cdprintf("."); -- cgit From 9d91a6f7a489eb914c16b82d927f9d81d629c259 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2009 11:40:28 +0200 Subject: perf_counter tools: Handle lost events Make use of the new ->data_tail mechanism to tell kernel-space about user-space draining the data stream. Emit lost events (and display them) if they happen. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 9a3805f0c9f2..fe66895111b1 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -83,6 +83,12 @@ struct period_event { __u64 sample_period; }; +struct lost_event { + struct perf_event_header header; + __u64 id; + __u64 lost; +}; + typedef union event_union { struct perf_event_header header; struct ip_event ip; @@ -90,6 +96,7 @@ typedef union event_union { struct comm_event comm; struct fork_event fork; struct period_event period; + struct lost_event lost; } event_t; static LIST_HEAD(dsos); @@ -1068,7 +1075,8 @@ static unsigned long total = 0, total_mmap = 0, total_comm = 0, total_fork = 0, - total_unknown = 0; + total_unknown = 0, + total_lost = 0; static int validate_chain(struct perf_callchain_entry *chain, event_t *event) { @@ -1260,6 +1268,20 @@ process_period_event(event_t *event, unsigned long offset, unsigned long head) return 0; } +static int +process_lost_event(event_t *event, unsigned long offset, unsigned long head) +{ + dprintf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n", + (void *)(offset + head), + (void *)(long)(event->header.size), + event->lost.id, + event->lost.lost); + + total_lost += event->lost.lost; + + return 0; +} + static void trace_event(event_t *event) { unsigned char *raw_event = (void *)event; @@ -1316,6 +1338,10 @@ process_event(event_t *event, unsigned long offset, unsigned long head) case PERF_EVENT_PERIOD: return process_period_event(event, offset, head); + + case PERF_EVENT_LOST: + return process_lost_event(event, offset, head); + /* * We dont process them right now but they are fine: */ @@ -1444,6 +1470,7 @@ more: dprintf(" mmap events: %10ld\n", total_mmap); dprintf(" comm events: %10ld\n", total_comm); dprintf(" fork events: %10ld\n", total_fork); + dprintf(" lost events: %10ld\n", total_lost); dprintf(" unknown events: %10ld\n", total_unknown); if (dump_trace) -- cgit From b8e6d829729d1a5991a9f628205b671cac2ec06f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 18 Jun 2009 14:32:19 +0200 Subject: perf report: Filter to parent set by default Make it easier to use parent filtering - default to a filtered output. Also add the parent column so that we get collapsing but dont display it by default. add --no-exclude-other to override this. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index fe66895111b1..86981bd08f65 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -46,9 +46,12 @@ static int full_paths; static unsigned long page_size; static unsigned long mmap_window = 32; -static char *parent_pattern = "^sys_|^do_page_fault"; +static char default_parent_pattern[] = "^sys_|^do_page_fault"; +static char *parent_pattern = default_parent_pattern; static regex_t parent_regex; +static int exclude_other = 1; + struct ip_event { struct perf_event_header header; __u64 ip; @@ -742,6 +745,9 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) struct sort_entry *se; size_t ret; + if (exclude_other && !self->parent) + return 0; + if (total_samples) { double percent = self->count * 100.0 / total_samples; char *color = PERF_COLOR_NORMAL; @@ -764,6 +770,9 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) ret = fprintf(fp, "%12Ld ", self->count); list_for_each_entry(se, &hist_entry__sort_list, list) { + if (exclude_other && (se == &sort_parent)) + continue; + fprintf(fp, " "); ret += se->print(fp, self); } @@ -855,6 +864,7 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, .ip = ip, .level = level, .count = count, + .parent = NULL, }; int cmp; @@ -1029,14 +1039,20 @@ static size_t output__fprintf(FILE *fp, __u64 total_samples) fprintf(fp, "#\n"); fprintf(fp, "# Overhead"); - list_for_each_entry(se, &hist_entry__sort_list, list) + list_for_each_entry(se, &hist_entry__sort_list, list) { + if (exclude_other && (se == &sort_parent)) + continue; fprintf(fp, " %s", se->header); + } fprintf(fp, "\n"); fprintf(fp, "# ........"); list_for_each_entry(se, &hist_entry__sort_list, list) { int i; + if (exclude_other && (se == &sort_parent)) + continue; + fprintf(fp, " "); for (i = 0; i < strlen(se->header); i++) fprintf(fp, "."); @@ -1050,7 +1066,8 @@ static size_t output__fprintf(FILE *fp, __u64 total_samples) ret += hist_entry__fprintf(fp, pos, total_samples); } - if (!strcmp(sort_order, default_sort_order)) { + if (sort_order == default_sort_order && + parent_pattern == default_parent_pattern) { fprintf(fp, "#\n"); fprintf(fp, "# (For more details, try: perf report --sort comm,dso,symbol)\n"); fprintf(fp, "#\n"); @@ -1508,6 +1525,8 @@ static const struct option options[] = { "Don't shorten the pathnames taking into account the cwd"), OPT_STRING('p', "parent", &parent_pattern, "regex", "regex filter to identify parent, see: '--sort parent'"), + OPT_BOOLEAN('x', "exclude-other", &exclude_other, + "Only display entries with parent-match"), OPT_END() }; @@ -1536,6 +1555,11 @@ int cmd_report(int argc, const char **argv, const char *prefix) setup_sorting(); + if (parent_pattern != default_parent_pattern) + sort_dimension__add("parent"); + else + exclude_other = 0; + /* * Any (unrecognized) arguments left? */ -- cgit From 2a0a50fe9def21835d65035cc8109c0b6dd6099d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2009 22:20:45 +0200 Subject: perf_counter: Update userspace callchain sampling uses Update the tools to reflect the new callchain sampling format. LKML-Reference: Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 86 ++++++++++++++++++++------------------------- 1 file changed, 39 insertions(+), 47 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 86981bd08f65..7a6577bf9a41 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -59,6 +59,11 @@ struct ip_event { unsigned char __more_data[]; }; +struct ip_callchain { + __u64 nr; + __u64 ips[0]; +}; + struct mmap_event { struct perf_event_header header; __u32 pid, tid; @@ -833,15 +838,12 @@ got_dso: return dso->find_symbol(dso, ip); } -static struct symbol *call__match(struct symbol *sym) +static int call__match(struct symbol *sym) { - if (!sym) - return NULL; - if (sym->name && !regexec(&parent_regex, sym->name, 0, NULL, 0)) - return sym; + return 1; - return NULL; + return 0; } /* @@ -850,7 +852,7 @@ static struct symbol *call__match(struct symbol *sym) static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, struct perf_callchain_entry *chain, + struct symbol *sym, __u64 ip, struct ip_callchain *chain, char level, __u64 count) { struct rb_node **p = &hist.rb_node; @@ -869,31 +871,35 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, int cmp; if (sort__has_parent && chain) { - int i, nr = chain->hv; - struct symbol *sym; - struct dso *dso; - __u64 ip; - - for (i = 0; i < chain->kernel; i++) { - ip = chain->ip[nr + i]; - dso = kernel_dso; + __u64 context = PERF_CONTEXT_MAX; + int i; + + for (i = 0; i < chain->nr; i++) { + __u64 ip = chain->ips[i]; + struct dso *dso = NULL; + struct symbol *sym; + + if (ip >= PERF_CONTEXT_MAX) { + context = ip; + continue; + } + + switch (context) { + case PERF_CONTEXT_KERNEL: + dso = kernel_dso; + break; + default: + break; + } + sym = resolve_symbol(thread, NULL, &dso, &ip); - entry.parent = call__match(sym); - if (entry.parent) - goto got_parent; - } - nr += i; - - for (i = 0; i < chain->user; i++) { - ip = chain->ip[nr + i]; - sym = resolve_symbol(thread, NULL, NULL, &ip); - entry.parent = call__match(sym); - if (entry.parent) - goto got_parent; + + if (sym && call__match(sym)) { + entry.parent = sym; + break; + } } - nr += i; } -got_parent: while (*p != NULL) { parent = *p; @@ -1095,21 +1101,10 @@ static unsigned long total = 0, total_unknown = 0, total_lost = 0; -static int validate_chain(struct perf_callchain_entry *chain, event_t *event) +static int validate_chain(struct ip_callchain *chain, event_t *event) { unsigned int chain_size; - if (chain->nr > MAX_STACK_DEPTH) - return -1; - if (chain->hv > MAX_STACK_DEPTH) - return -1; - if (chain->kernel > MAX_STACK_DEPTH) - return -1; - if (chain->user > MAX_STACK_DEPTH) - return -1; - if (chain->hv + chain->kernel + chain->user != chain->nr) - return -1; - chain_size = event->header.size; chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; @@ -1130,7 +1125,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) __u64 period = 1; struct map *map = NULL; void *more_data = event->ip.__more_data; - struct perf_callchain_entry *chain = NULL; + struct ip_callchain *chain = NULL; if (event->header.type & PERF_SAMPLE_PERIOD) { period = *(__u64 *)more_data; @@ -1150,10 +1145,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) chain = (void *)more_data; - dprintf("... chain: u:%d, k:%d, nr:%d\n", - chain->user, - chain->kernel, - chain->nr); + dprintf("... chain: nr:%Lu\n", chain->nr); if (validate_chain(chain, event) < 0) { eprintf("call-chain problem with event, skipping it.\n"); @@ -1162,7 +1154,7 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) if (dump_trace) { for (i = 0; i < chain->nr; i++) - dprintf("..... %2d: %016Lx\n", i, chain->ip[i]); + dprintf("..... %2d: %016Lx\n", i, chain->ips[i]); } } -- cgit From f5970550d5ccf90453cbd7d260370ea99d1f6513 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2009 23:22:55 +0200 Subject: perf_counter tools: Add a data file header Add a data file header so we can transfer data between record and report. LKML-Reference: Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7a6577bf9a41..37b26ecb0d0b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1366,11 +1366,13 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return 0; } +static struct perf_file_header file_header; + static int __cmd_report(void) { int ret, rc = EXIT_FAILURE; unsigned long offset = 0; - unsigned long head = 0; + unsigned long head = sizeof(file_header); struct stat stat; event_t *event; uint32_t size; @@ -1398,6 +1400,14 @@ static int __cmd_report(void) exit(0); } + read(input, &file_header, sizeof(file_header)); + + if (sort__has_parent && + !(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) { + fprintf(stderr, "selected --sort parent, but no callchain data\n"); + exit(-1); + } + if (load_kernel() < 0) { perror("failed to load kernel symbols"); return EXIT_FAILURE; @@ -1469,9 +1479,13 @@ more: head += size; + if (offset + head >= sizeof(file_header) + file_header.data_size) + goto done; + if (offset + head < stat.st_size) goto more; +done: rc = EXIT_SUCCESS; close(input); -- cgit From 9cffa8d53335d891cc0ecb3824a67118b3ee4b2f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 19 Jun 2009 22:21:42 +1000 Subject: perf_counter tools: Define and use our own u64, s64 etc. definitions On 64-bit powerpc, __u64 is defined to be unsigned long rather than unsigned long long. This causes compiler warnings every time we print a __u64 value with %Lx. Rather than changing __u64, we define our own u64 to be unsigned long long on all architectures, and similarly s64 as signed long long. For consistency we also define u32, s32, u16, s16, u8 and s8. These definitions are put in a new header, types.h, because these definitions are needed in util/string.h and util/symbol.h. The main change here is the mechanical change of __[us]{64,32,16,8} to remove the "__". The other changes are: * Create types.h * Include types.h in perf.h, util/string.h and util/symbol.h * Add types.h to the LIB_H definition in Makefile * Added (u64) casts in process_overflow_event() and print_sym_table() to kill two remaining warnings. Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: benh@kernel.crashing.org LKML-Reference: <19003.33494.495844.956580@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 84 ++++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 37b26ecb0d0b..de1b97845e9e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -54,47 +54,47 @@ static int exclude_other = 1; struct ip_event { struct perf_event_header header; - __u64 ip; - __u32 pid, tid; + u64 ip; + u32 pid, tid; unsigned char __more_data[]; }; struct ip_callchain { - __u64 nr; - __u64 ips[0]; + u64 nr; + u64 ips[0]; }; struct mmap_event { struct perf_event_header header; - __u32 pid, tid; - __u64 start; - __u64 len; - __u64 pgoff; + u32 pid, tid; + u64 start; + u64 len; + u64 pgoff; char filename[PATH_MAX]; }; struct comm_event { struct perf_event_header header; - __u32 pid, tid; + u32 pid, tid; char comm[16]; }; struct fork_event { struct perf_event_header header; - __u32 pid, ppid; + u32 pid, ppid; }; struct period_event { struct perf_event_header header; - __u64 time; - __u64 id; - __u64 sample_period; + u64 time; + u64 id; + u64 sample_period; }; struct lost_event { struct perf_event_header header; - __u64 id; - __u64 lost; + u64 id; + u64 lost; }; typedef union event_union { @@ -163,7 +163,7 @@ static void dsos__fprintf(FILE *fp) dso__fprintf(pos, fp); } -static struct symbol *vdso__find_symbol(struct dso *dso, __u64 ip) +static struct symbol *vdso__find_symbol(struct dso *dso, u64 ip) { return dso__find_symbol(kernel_dso, ip); } @@ -210,19 +210,19 @@ static int strcommon(const char *pathname) struct map { struct list_head node; - __u64 start; - __u64 end; - __u64 pgoff; - __u64 (*map_ip)(struct map *, __u64); + u64 start; + u64 end; + u64 pgoff; + u64 (*map_ip)(struct map *, u64); struct dso *dso; }; -static __u64 map__map_ip(struct map *map, __u64 ip) +static u64 map__map_ip(struct map *map, u64 ip) { return ip - map->start + map->pgoff; } -static __u64 vdso__map_ip(struct map *map, __u64 ip) +static u64 vdso__map_ip(struct map *map, u64 ip) { return ip; } @@ -429,7 +429,7 @@ static int thread__fork(struct thread *self, struct thread *parent) return 0; } -static struct map *thread__find_map(struct thread *self, __u64 ip) +static struct map *thread__find_map(struct thread *self, u64 ip) { struct map *pos; @@ -471,10 +471,10 @@ struct hist_entry { struct dso *dso; struct symbol *sym; struct symbol *parent; - __u64 ip; + u64 ip; char level; - __u64 count; + u64 count; }; /* @@ -574,7 +574,7 @@ sort__dso_print(FILE *fp, struct hist_entry *self) if (self->dso) return fprintf(fp, "%-25s", self->dso->name); - return fprintf(fp, "%016llx ", (__u64)self->ip); + return fprintf(fp, "%016llx ", (u64)self->ip); } static struct sort_entry sort_dso = { @@ -588,7 +588,7 @@ static struct sort_entry sort_dso = { static int64_t sort__sym_cmp(struct hist_entry *left, struct hist_entry *right) { - __u64 ip_l, ip_r; + u64 ip_l, ip_r; if (left->sym == right->sym) return 0; @@ -605,13 +605,13 @@ sort__sym_print(FILE *fp, struct hist_entry *self) size_t ret = 0; if (verbose) - ret += fprintf(fp, "%#018llx ", (__u64)self->ip); + ret += fprintf(fp, "%#018llx ", (u64)self->ip); if (self->sym) { ret += fprintf(fp, "[%c] %s", self->dso == kernel_dso ? 'k' : '.', self->sym->name); } else { - ret += fprintf(fp, "%#016llx", (__u64)self->ip); + ret += fprintf(fp, "%#016llx", (u64)self->ip); } return ret; @@ -745,7 +745,7 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) } static size_t -hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) +hist_entry__fprintf(FILE *fp, struct hist_entry *self, u64 total_samples) { struct sort_entry *se; size_t ret; @@ -793,7 +793,7 @@ hist_entry__fprintf(FILE *fp, struct hist_entry *self, __u64 total_samples) static struct symbol * resolve_symbol(struct thread *thread, struct map **mapp, - struct dso **dsop, __u64 *ipp) + struct dso **dsop, u64 *ipp) { struct dso *dso = dsop ? *dsop : NULL; struct map *map = mapp ? *mapp : NULL; @@ -852,8 +852,8 @@ static int call__match(struct symbol *sym) static int hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, - struct symbol *sym, __u64 ip, struct ip_callchain *chain, - char level, __u64 count) + struct symbol *sym, u64 ip, struct ip_callchain *chain, + char level, u64 count) { struct rb_node **p = &hist.rb_node; struct rb_node *parent = NULL; @@ -871,11 +871,11 @@ hist_entry__add(struct thread *thread, struct map *map, struct dso *dso, int cmp; if (sort__has_parent && chain) { - __u64 context = PERF_CONTEXT_MAX; + u64 context = PERF_CONTEXT_MAX; int i; for (i = 0; i < chain->nr; i++) { - __u64 ip = chain->ips[i]; + u64 ip = chain->ips[i]; struct dso *dso = NULL; struct symbol *sym; @@ -1032,7 +1032,7 @@ static void output__resort(void) } } -static size_t output__fprintf(FILE *fp, __u64 total_samples) +static size_t output__fprintf(FILE *fp, u64 total_samples) { struct hist_entry *pos; struct sort_entry *se; @@ -1041,7 +1041,7 @@ static size_t output__fprintf(FILE *fp, __u64 total_samples) fprintf(fp, "\n"); fprintf(fp, "#\n"); - fprintf(fp, "# (%Ld samples)\n", (__u64)total_samples); + fprintf(fp, "# (%Ld samples)\n", (u64)total_samples); fprintf(fp, "#\n"); fprintf(fp, "# Overhead"); @@ -1108,7 +1108,7 @@ static int validate_chain(struct ip_callchain *chain, event_t *event) chain_size = event->header.size; chain_size -= (unsigned long)&event->ip.__more_data - (unsigned long)event; - if (chain->nr*sizeof(__u64) > chain_size) + if (chain->nr*sizeof(u64) > chain_size) return -1; return 0; @@ -1121,15 +1121,15 @@ process_overflow_event(event_t *event, unsigned long offset, unsigned long head) int show = 0; struct dso *dso = NULL; struct thread *thread = threads__findnew(event->ip.pid); - __u64 ip = event->ip.ip; - __u64 period = 1; + u64 ip = event->ip.ip; + u64 period = 1; struct map *map = NULL; void *more_data = event->ip.__more_data; struct ip_callchain *chain = NULL; if (event->header.type & PERF_SAMPLE_PERIOD) { - period = *(__u64 *)more_data; - more_data += sizeof(__u64); + period = *(u64 *)more_data; + more_data += sizeof(u64); } dprintf("%p [%p]: PERF_EVENT (IP, %d): %d: %p period: %Ld\n", -- cgit From eadc84cc01e04f9f74ec2de0c9355be035c7b396 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 20 Jun 2009 02:01:40 +0200 Subject: perfcounter: Handle some IO return values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Building perfcounter tools raises the following warnings: builtin-record.c: In function ‘atexit_header’: builtin-record.c:464: erreur: ignoring return value of ‘pwrite’, declared with attribute warn_unused_result builtin-record.c: In function ‘__cmd_record’: builtin-record.c:503: erreur: ignoring return value of ‘read’, declared with attribute warn_unused_result builtin-report.c: In function ‘__cmd_report’: builtin-report.c:1403: erreur: ignoring return value of ‘read’, declared with attribute warn_unused_result This patch handles these IO return values. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <1245456100-5477-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-report.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index de1b97845e9e..5eb5566f0c95 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1400,7 +1400,10 @@ static int __cmd_report(void) exit(0); } - read(input, &file_header, sizeof(file_header)); + if (read(input, &file_header, sizeof(file_header)) == -1) { + perror("failed to read file headers"); + exit(-1); + } if (sort__has_parent && !(file_header.sample_type & PERF_SAMPLE_CALLCHAIN)) { -- cgit