From 864709302a80f26fa9da3be5b47304f0b8bae192 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Jun 2009 20:33:43 +0200 Subject: perf_counter tools: Move from Documentation/perf_counter/ to tools/perf/ Several people have suggested that 'perf' has become a full-fledged tool that should be moved out of Documentation/. Move it to the (new) tools/ directory. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 339 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 339 insertions(+) create mode 100644 tools/perf/builtin-stat.c (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c new file mode 100644 index 000000000000..2cbf5a189589 --- /dev/null +++ b/tools/perf/builtin-stat.c @@ -0,0 +1,339 @@ +/* + * builtin-stat.c + * + * Builtin stat command: Give a precise performance counters summary + * overview about any workload, CPU or specific PID. + * + * Sample output: + + $ perf stat ~/hackbench 10 + Time: 0.104 + + Performance counter stats for '/home/mingo/hackbench': + + 1255.538611 task clock ticks # 10.143 CPU utilization factor + 54011 context switches # 0.043 M/sec + 385 CPU migrations # 0.000 M/sec + 17755 pagefaults # 0.014 M/sec + 3808323185 CPU cycles # 3033.219 M/sec + 1575111190 instructions # 1254.530 M/sec + 17367895 cache references # 13.833 M/sec + 7674421 cache misses # 6.112 M/sec + + Wall-clock time elapsed: 123.786620 msecs + + * + * Copyright (C) 2008, Red Hat Inc, Ingo Molnar + * + * Improvements and fixes by: + * + * Arjan van de Ven + * Yanmin Zhang + * Wu Fengguang + * Mike Galbraith + * Paul Mackerras + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include "perf.h" +#include "builtin.h" +#include "util/util.h" +#include "util/parse-options.h" +#include "util/parse-events.h" + +#include + +static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { + + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_TASK_CLOCK }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CONTEXT_SWITCHES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CPU_MIGRATIONS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_PAGE_FAULTS }, + + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CPU_CYCLES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_REFERENCES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_MISSES }, +}; + +static int system_wide = 0; +static int inherit = 1; + +static int fd[MAX_NR_CPUS][MAX_COUNTERS]; + +static int target_pid = -1; +static int nr_cpus = 0; +static unsigned int page_size; + +static int scale = 1; + +static const unsigned int default_count[] = { + 1000000, + 1000000, + 10000, + 10000, + 1000000, + 10000, +}; + +static __u64 event_res[MAX_COUNTERS][3]; +static __u64 event_scaled[MAX_COUNTERS]; + +static __u64 runtime_nsecs; +static __u64 walltime_nsecs; + +static void create_perfstat_counter(int counter) +{ + struct perf_counter_attr *attr = attrs + counter; + + if (scale) + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + + if (system_wide) { + int cpu; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); + if (fd[cpu][counter] < 0) { + printf("perfstat error: syscall returned with %d (%s)\n", + fd[cpu][counter], strerror(errno)); + exit(-1); + } + } + } else { + attr->inherit = inherit; + attr->disabled = 1; + + fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); + if (fd[0][counter] < 0) { + printf("perfstat error: syscall returned with %d (%s)\n", + fd[0][counter], strerror(errno)); + exit(-1); + } + } +} + +/* + * Does the counter have nsecs as a unit? + */ +static inline int nsec_counter(int counter) +{ + if (attrs[counter].type != PERF_TYPE_SOFTWARE) + return 0; + + if (attrs[counter].config == PERF_COUNT_CPU_CLOCK) + return 1; + + if (attrs[counter].config == PERF_COUNT_TASK_CLOCK) + return 1; + + return 0; +} + +/* + * Read out the results of a single counter: + */ +static void read_counter(int counter) +{ + __u64 *count, single_count[3]; + ssize_t res; + int cpu, nv; + int scaled; + + count = event_res[counter]; + + count[0] = count[1] = count[2] = 0; + + nv = scale ? 3 : 1; + for (cpu = 0; cpu < nr_cpus; cpu ++) { + res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); + assert(res == nv * sizeof(__u64)); + + count[0] += single_count[0]; + if (scale) { + count[1] += single_count[1]; + count[2] += single_count[2]; + } + } + + scaled = 0; + if (scale) { + if (count[2] == 0) { + event_scaled[counter] = -1; + count[0] = 0; + return; + } + + if (count[2] < count[1]) { + event_scaled[counter] = 1; + count[0] = (unsigned long long) + ((double)count[0] * count[1] / count[2] + 0.5); + } + } + /* + * Save the full runtime - to allow normalization during printout: + */ + if (attrs[counter].type == PERF_TYPE_SOFTWARE && + attrs[counter].config == PERF_COUNT_TASK_CLOCK) + runtime_nsecs = count[0]; +} + +/* + * Print out the results of a single counter: + */ +static void print_counter(int counter) +{ + __u64 *count; + int scaled; + + count = event_res[counter]; + scaled = event_scaled[counter]; + + if (scaled == -1) { + fprintf(stderr, " %14s %-20s\n", + "", event_name(counter)); + return; + } + + if (nsec_counter(counter)) { + double msecs = (double)count[0] / 1000000; + + fprintf(stderr, " %14.6f %-20s", + msecs, event_name(counter)); + if (attrs[counter].type == PERF_TYPE_SOFTWARE && + attrs[counter].config == PERF_COUNT_TASK_CLOCK) { + + fprintf(stderr, " # %11.3f CPU utilization factor", + (double)count[0] / (double)walltime_nsecs); + } + } else { + fprintf(stderr, " %14Ld %-20s", + count[0], event_name(counter)); + if (runtime_nsecs) + fprintf(stderr, " # %11.3f M/sec", + (double)count[0]/runtime_nsecs*1000.0); + } + if (scaled) + fprintf(stderr, " (scaled from %.2f%%)", + (double) count[2] / count[1] * 100); + fprintf(stderr, "\n"); +} + +static int do_perfstat(int argc, const char **argv) +{ + unsigned long long t0, t1; + int counter; + int status; + int pid; + int i; + + if (!system_wide) + nr_cpus = 1; + + for (counter = 0; counter < nr_counters; counter++) + create_perfstat_counter(counter); + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + prctl(PR_TASK_PERF_COUNTERS_ENABLE); + + if ((pid = fork()) < 0) + perror("failed to fork"); + + if (!pid) { + if (execvp(argv[0], (char **)argv)) { + perror(argv[0]); + exit(-1); + } + } + + while (wait(&status) >= 0) + ; + + prctl(PR_TASK_PERF_COUNTERS_DISABLE); + t1 = rdclock(); + + walltime_nsecs = t1 - t0; + + fflush(stdout); + + fprintf(stderr, "\n"); + fprintf(stderr, " Performance counter stats for \'%s", argv[0]); + + for (i = 1; i < argc; i++) + fprintf(stderr, " %s", argv[i]); + + fprintf(stderr, "\':\n"); + fprintf(stderr, "\n"); + + for (counter = 0; counter < nr_counters; counter++) + read_counter(counter); + + for (counter = 0; counter < nr_counters; counter++) + print_counter(counter); + + + fprintf(stderr, "\n"); + fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", + (double)(t1-t0)/1e6); + fprintf(stderr, "\n"); + + return 0; +} + +static void skip_signal(int signo) +{ +} + +static const char * const stat_usage[] = { + "perf stat [] ", + NULL +}; + +static const struct option options[] = { + OPT_CALLBACK('e', "event", NULL, "event", + "event selector. use 'perf list' to list available events", + parse_events), + OPT_BOOLEAN('i', "inherit", &inherit, + "child tasks inherit counters"), + OPT_INTEGER('p', "pid", &target_pid, + "stat events on existing pid"), + OPT_BOOLEAN('a', "all-cpus", &system_wide, + "system-wide collection from all CPUs"), + OPT_BOOLEAN('S', "scale", &scale, + "scale/normalize counters"), + OPT_END() +}; + +int cmd_stat(int argc, const char **argv, const char *prefix) +{ + page_size = sysconf(_SC_PAGE_SIZE); + + memcpy(attrs, default_attrs, sizeof(attrs)); + + argc = parse_options(argc, argv, options, stat_usage, 0); + if (!argc) + usage_with_options(stat_usage, options); + + if (!nr_counters) + nr_counters = 8; + + nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); + assert(nr_cpus <= MAX_NR_CPUS); + assert(nr_cpus >= 0); + + /* + * We dont want to block the signals - that would cause + * child tasks to inherit that and Ctrl-C would not work. + * What we want is for Ctrl-C to work in the exec()-ed + * task, but being ignored by perf stat itself: + */ + signal(SIGINT, skip_signal); + signal(SIGALRM, skip_signal); + signal(SIGABRT, skip_signal); + + return do_perfstat(argc, argv); +} -- cgit From 743ee1f80434138495bbb95ffb897acf46b51d54 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 7 Jun 2009 17:06:46 +0200 Subject: perf stat: Continue even on counter creation error Before: $ perf stat ~/hackbench 5 error: syscall returned with -1 (No such device) After: $ perf stat ~/hackbench 5 Time: 1.640 Performance counter stats for '/home/mingo/hackbench 5': 6524.570382 task-clock-ticks # 3.838 CPU utilization factor 35704 context-switches # 0.005 M/sec 191 CPU-migrations # 0.000 M/sec 8958 page-faults # 0.001 M/sec cycles instructions cache-references cache-misses Wall-clock time elapsed: 1699.999995 msecs Also add -v (--verbose) option to allow the printing of failed counter opens. Plus dont print 'inf' if wall-time is zero (due to jiffies granularity), instead skip the printing of the CPU utilization factor. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2cbf5a189589..184ff95ef4f5 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -59,6 +59,7 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { static int system_wide = 0; static int inherit = 1; +static int verbose = 0; static int fd[MAX_NR_CPUS][MAX_COUNTERS]; @@ -83,7 +84,7 @@ static __u64 event_scaled[MAX_COUNTERS]; static __u64 runtime_nsecs; static __u64 walltime_nsecs; -static void create_perfstat_counter(int counter) +static void create_perf_stat_counter(int counter) { struct perf_counter_attr *attr = attrs + counter; @@ -95,10 +96,8 @@ static void create_perfstat_counter(int counter) int cpu; for (cpu = 0; cpu < nr_cpus; cpu ++) { fd[cpu][counter] = sys_perf_counter_open(attr, -1, cpu, -1, 0); - if (fd[cpu][counter] < 0) { - printf("perfstat error: syscall returned with %d (%s)\n", - fd[cpu][counter], strerror(errno)); - exit(-1); + if (fd[cpu][counter] < 0 && verbose) { + printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[cpu][counter], strerror(errno)); } } } else { @@ -106,10 +105,8 @@ static void create_perfstat_counter(int counter) attr->disabled = 1; fd[0][counter] = sys_perf_counter_open(attr, 0, -1, -1, 0); - if (fd[0][counter] < 0) { - printf("perfstat error: syscall returned with %d (%s)\n", - fd[0][counter], strerror(errno)); - exit(-1); + if (fd[0][counter] < 0 && verbose) { + printf("Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n", counter, fd[0][counter], strerror(errno)); } } } @@ -147,6 +144,9 @@ static void read_counter(int counter) nv = scale ? 3 : 1; for (cpu = 0; cpu < nr_cpus; cpu ++) { + if (fd[cpu][counter] < 0) + continue; + res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); assert(res == nv * sizeof(__u64)); @@ -204,8 +204,9 @@ static void print_counter(int counter) if (attrs[counter].type == PERF_TYPE_SOFTWARE && attrs[counter].config == PERF_COUNT_TASK_CLOCK) { - fprintf(stderr, " # %11.3f CPU utilization factor", - (double)count[0] / (double)walltime_nsecs); + if (walltime_nsecs) + fprintf(stderr, " # %11.3f CPU utilization factor", + (double)count[0] / (double)walltime_nsecs); } } else { fprintf(stderr, " %14Ld %-20s", @@ -220,7 +221,7 @@ static void print_counter(int counter) fprintf(stderr, "\n"); } -static int do_perfstat(int argc, const char **argv) +static int do_perf_stat(int argc, const char **argv) { unsigned long long t0, t1; int counter; @@ -232,7 +233,7 @@ static int do_perfstat(int argc, const char **argv) nr_cpus = 1; for (counter = 0; counter < nr_counters; counter++) - create_perfstat_counter(counter); + create_perf_stat_counter(counter); /* * Enable counters and exec the command: @@ -305,6 +306,8 @@ static const struct option options[] = { "system-wide collection from all CPUs"), OPT_BOOLEAN('S', "scale", &scale, "scale/normalize counters"), + OPT_BOOLEAN('v', "verbose", &verbose, + "be more verbose (show counter open errors, etc)"), OPT_END() }; @@ -335,5 +338,5 @@ int cmd_stat(int argc, const char **argv, const char *prefix) signal(SIGALRM, skip_signal); signal(SIGABRT, skip_signal); - return do_perfstat(argc, argv); + return do_perf_stat(argc, argv); } -- cgit From e779898aa74cd2e97216368b3f3689ceffe8aeed Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 7 Jun 2009 18:14:46 +0200 Subject: perf stat: Print out instructins/cycle metric Before: 7549326754 cycles # 3201.811 M/sec 10007594937 instructions # 4244.408 M/sec After: 7542051194 cycles # 3201.996 M/sec 10007743852 instructions # 4248.811 M/sec # 1.327 per cycle Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 184ff95ef4f5..80855090fd9f 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -83,6 +83,7 @@ static __u64 event_scaled[MAX_COUNTERS]; static __u64 runtime_nsecs; static __u64 walltime_nsecs; +static __u64 runtime_cycles; static void create_perf_stat_counter(int counter) { @@ -177,6 +178,9 @@ static void read_counter(int counter) if (attrs[counter].type == PERF_TYPE_SOFTWARE && attrs[counter].config == PERF_COUNT_TASK_CLOCK) runtime_nsecs = count[0]; + if (attrs[counter].type == PERF_TYPE_HARDWARE && + attrs[counter].config == PERF_COUNT_CPU_CYCLES) + runtime_cycles = count[0]; } /* @@ -214,6 +218,13 @@ static void print_counter(int counter) if (runtime_nsecs) fprintf(stderr, " # %11.3f M/sec", (double)count[0]/runtime_nsecs*1000.0); + if (runtime_cycles && + attrs[counter].type == PERF_TYPE_HARDWARE && + attrs[counter].config == PERF_COUNT_INSTRUCTIONS) { + + fprintf(stderr, " # %1.3f per cycle", + (double)count[0] / (double)runtime_cycles); + } } if (scaled) fprintf(stderr, " (scaled from %.2f%%)", -- cgit From f7b7c26e01e51fe46097e11f179dc71ce7950084 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Jun 2009 15:55:59 +0200 Subject: perf_counter tools: Propagate signals properly Currently report and stat catch SIGINT (and others) without altering their exit state. This means that things like: while :; do perf stat ./foo ; done Loops become hard-to-interrupt, because bash never sees perf terminate due to interruption. Fix this. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 80855090fd9f..6404906924fa 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -296,8 +296,20 @@ static int do_perf_stat(int argc, const char **argv) return 0; } +static volatile int signr = -1; + static void skip_signal(int signo) { + signr = signo; +} + +static void sig_atexit(void) +{ + if (signr == -1) + return; + + signal(signr, SIG_DFL); + kill(getpid(), signr); } static const char * const stat_usage[] = { @@ -345,6 +357,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix) * What we want is for Ctrl-C to work in the exec()-ed * task, but being ignored by perf stat itself: */ + atexit(sig_atexit); signal(SIGINT, skip_signal); signal(SIGALRM, skip_signal); signal(SIGABRT, skip_signal); -- cgit From f4dbfa8f3131a84257223393905f7efad0ca5996 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Jun 2009 14:06:28 +0200 Subject: perf_counter: Standardize event names Pure renames only, to PERF_COUNT_HW_* and PERF_COUNT_SW_*. Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6404906924fa..c43e4a97dc42 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -46,15 +46,16 @@ static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_TASK_CLOCK }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CONTEXT_SWITCHES }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_CPU_MIGRATIONS }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_PAGE_FAULTS }, - - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CPU_CYCLES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_REFERENCES }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_CACHE_MISSES }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES}, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, + { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, + + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_REFERENCES}, + { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CACHE_MISSES }, + }; static int system_wide = 0; @@ -120,10 +121,10 @@ static inline int nsec_counter(int counter) if (attrs[counter].type != PERF_TYPE_SOFTWARE) return 0; - if (attrs[counter].config == PERF_COUNT_CPU_CLOCK) + if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK) return 1; - if (attrs[counter].config == PERF_COUNT_TASK_CLOCK) + if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) return 1; return 0; @@ -176,10 +177,10 @@ static void read_counter(int counter) * Save the full runtime - to allow normalization during printout: */ if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_TASK_CLOCK) + attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) runtime_nsecs = count[0]; if (attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_CPU_CYCLES) + attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) runtime_cycles = count[0]; } @@ -206,7 +207,7 @@ static void print_counter(int counter) fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_TASK_CLOCK) { + attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { if (walltime_nsecs) fprintf(stderr, " # %11.3f CPU utilization factor", @@ -220,7 +221,7 @@ static void print_counter(int counter) (double)count[0]/runtime_nsecs*1000.0); if (runtime_cycles && attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_INSTRUCTIONS) { + attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { fprintf(stderr, " # %1.3f per cycle", (double)count[0] / (double)runtime_cycles); -- cgit From 44175b6f397a6724121eeaf0f072e2c912a46614 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 13 Jun 2009 13:35:00 +0200 Subject: perf stat: Reorganize output - use IPC for the instruction normalization output - CPUs for the CPU utilization factor value. - print out time elapsed like the other rows - tidy up the task-clocks/cpu-clocks printout Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 67 ++++++++++++++++++++++++++++------------------- 1 file changed, 40 insertions(+), 27 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c43e4a97dc42..c12804853eab 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -184,6 +184,40 @@ static void read_counter(int counter) runtime_cycles = count[0]; } +static void nsec_printout(int counter, __u64 *count) +{ + double msecs = (double)count[0] / 1000000; + + fprintf(stderr, " %14.6f %-20s", msecs, event_name(counter)); + + if (attrs[counter].type == PERF_TYPE_SOFTWARE && + attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { + + if (walltime_nsecs) + fprintf(stderr, " # %10.3f CPUs", + (double)count[0] / (double)walltime_nsecs); + } +} + +static void abs_printout(int counter, __u64 *count) +{ + fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); + + if (runtime_cycles && + attrs[counter].type == PERF_TYPE_HARDWARE && + attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { + + fprintf(stderr, " # %10.3f IPC", + (double)count[0] / (double)runtime_cycles); + + return; + } + + if (runtime_nsecs) + fprintf(stderr, " # %10.3f M/sec", + (double)count[0]/runtime_nsecs*1000.0); +} + /* * Print out the results of a single counter: */ @@ -201,35 +235,15 @@ static void print_counter(int counter) return; } - if (nsec_counter(counter)) { - double msecs = (double)count[0] / 1000000; - - fprintf(stderr, " %14.6f %-20s", - msecs, event_name(counter)); - if (attrs[counter].type == PERF_TYPE_SOFTWARE && - attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { + if (nsec_counter(counter)) + nsec_printout(counter, count); + else + abs_printout(counter, count); - if (walltime_nsecs) - fprintf(stderr, " # %11.3f CPU utilization factor", - (double)count[0] / (double)walltime_nsecs); - } - } else { - fprintf(stderr, " %14Ld %-20s", - count[0], event_name(counter)); - if (runtime_nsecs) - fprintf(stderr, " # %11.3f M/sec", - (double)count[0]/runtime_nsecs*1000.0); - if (runtime_cycles && - attrs[counter].type == PERF_TYPE_HARDWARE && - attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { - - fprintf(stderr, " # %1.3f per cycle", - (double)count[0] / (double)runtime_cycles); - } - } if (scaled) fprintf(stderr, " (scaled from %.2f%%)", (double) count[2] / count[1] * 100); + fprintf(stderr, "\n"); } @@ -290,8 +304,7 @@ static int do_perf_stat(int argc, const char **argv) fprintf(stderr, "\n"); - fprintf(stderr, " Wall-clock time elapsed: %12.6f msecs\n", - (double)(t1-t0)/1e6); + fprintf(stderr, " %14.9f seconds time elapsed.\n", (double)(t1-t0)/1e9); fprintf(stderr, "\n"); return 0; -- cgit From 42202dd56c717f173cd0bf2390249e1bf5cf210b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 13 Jun 2009 14:57:28 +0200 Subject: perf stat: Add feature to run and measure a command multiple times Add the --repeat feature to perf stat, which repeats a given command up to a 100 times, collects the stats and calculates an average and a stddev. For example, the following oneliner 'perf stat' command runs hackbench 5 times and prints a tabulated result of all metrics, with averages and noise levels (in percentage) printed: aldebaran:~/linux/linux/tools/perf> ./perf stat --repeat 5 ~/hackbench 10 Time: 0.117 Time: 0.108 Time: 0.089 Time: 0.088 Time: 0.100 Performance counter stats for '/home/mingo/hackbench 10' (5 runs): 1243.989586 task-clock-msecs # 10.460 CPUs ( +- 4.720% ) 47706 context-switches # 0.038 M/sec ( +- 19.706% ) 387 CPU-migrations # 0.000 M/sec ( +- 3.608% ) 17793 page-faults # 0.014 M/sec ( +- 0.354% ) 3770941606 cycles # 3031.329 M/sec ( +- 4.621% ) 1566372416 instructions # 0.415 IPC ( +- 2.703% ) 16783421 cache-references # 13.492 M/sec ( +- 5.202% ) 7128590 cache-misses # 5.730 M/sec ( +- 7.420% ) 0.118924455 seconds time elapsed. The goal of this feature is to allow the reliance on these accurate statistics and to know how many times a command has to be repeated for the noise to go down to an acceptable level. (The -v option can be used to see a line printed out as each run progresses.) Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 259 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 194 insertions(+), 65 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index c12804853eab..9eb42b1ae784 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -43,6 +43,7 @@ #include "util/parse-events.h" #include +#include static struct perf_counter_attr default_attrs[MAX_COUNTERS] = { @@ -79,12 +80,34 @@ static const unsigned int default_count[] = { 10000, }; -static __u64 event_res[MAX_COUNTERS][3]; -static __u64 event_scaled[MAX_COUNTERS]; +#define MAX_RUN 100 -static __u64 runtime_nsecs; -static __u64 walltime_nsecs; -static __u64 runtime_cycles; +static int run_count = 1; +static int run_idx = 0; + +static __u64 event_res[MAX_RUN][MAX_COUNTERS][3]; +static __u64 event_scaled[MAX_RUN][MAX_COUNTERS]; + +//static __u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; + + +static __u64 runtime_nsecs[MAX_RUN]; +static __u64 walltime_nsecs[MAX_RUN]; +static __u64 runtime_cycles[MAX_RUN]; + +static __u64 event_res_avg[MAX_COUNTERS][3]; +static __u64 event_res_noise[MAX_COUNTERS][3]; + +static __u64 event_scaled_avg[MAX_COUNTERS]; + +static __u64 runtime_nsecs_avg; +static __u64 runtime_nsecs_noise; + +static __u64 walltime_nsecs_avg; +static __u64 walltime_nsecs_noise; + +static __u64 runtime_cycles_avg; +static __u64 runtime_cycles_noise; static void create_perf_stat_counter(int counter) { @@ -140,7 +163,7 @@ static void read_counter(int counter) int cpu, nv; int scaled; - count = event_res[counter]; + count = event_res[run_idx][counter]; count[0] = count[1] = count[2] = 0; @@ -151,6 +174,8 @@ static void read_counter(int counter) res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); assert(res == nv * sizeof(__u64)); + close(fd[cpu][counter]); + fd[cpu][counter] = -1; count[0] += single_count[0]; if (scale) { @@ -162,13 +187,13 @@ static void read_counter(int counter) scaled = 0; if (scale) { if (count[2] == 0) { - event_scaled[counter] = -1; + event_scaled[run_idx][counter] = -1; count[0] = 0; return; } if (count[2] < count[1]) { - event_scaled[counter] = 1; + event_scaled[run_idx][counter] = 1; count[0] = (unsigned long long) ((double)count[0] * count[1] / count[2] + 0.5); } @@ -178,13 +203,62 @@ static void read_counter(int counter) */ if (attrs[counter].type == PERF_TYPE_SOFTWARE && attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) - runtime_nsecs = count[0]; + runtime_nsecs[run_idx] = count[0]; if (attrs[counter].type == PERF_TYPE_HARDWARE && attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES) - runtime_cycles = count[0]; + runtime_cycles[run_idx] = count[0]; } -static void nsec_printout(int counter, __u64 *count) +static int run_perf_stat(int argc, const char **argv) +{ + unsigned long long t0, t1; + int status = 0; + int counter; + int pid; + + if (!system_wide) + nr_cpus = 1; + + for (counter = 0; counter < nr_counters; counter++) + create_perf_stat_counter(counter); + + /* + * Enable counters and exec the command: + */ + t0 = rdclock(); + prctl(PR_TASK_PERF_COUNTERS_ENABLE); + + if ((pid = fork()) < 0) + perror("failed to fork"); + + if (!pid) { + if (execvp(argv[0], (char **)argv)) { + perror(argv[0]); + exit(-1); + } + } + + wait(&status); + + prctl(PR_TASK_PERF_COUNTERS_DISABLE); + t1 = rdclock(); + + walltime_nsecs[run_idx] = t1 - t0; + + for (counter = 0; counter < nr_counters; counter++) + read_counter(counter); + + return WEXITSTATUS(status); +} + +static void print_noise(__u64 *count, __u64 *noise) +{ + if (run_count > 1) + fprintf(stderr, " ( +- %7.3f%% )", + (double)noise[0]/(count[0]+1)*100.0); +} + +static void nsec_printout(int counter, __u64 *count, __u64 *noise) { double msecs = (double)count[0] / 1000000; @@ -193,29 +267,30 @@ static void nsec_printout(int counter, __u64 *count) if (attrs[counter].type == PERF_TYPE_SOFTWARE && attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) { - if (walltime_nsecs) - fprintf(stderr, " # %10.3f CPUs", - (double)count[0] / (double)walltime_nsecs); + if (walltime_nsecs_avg) + fprintf(stderr, " # %10.3f CPUs ", + (double)count[0] / (double)walltime_nsecs_avg); } + print_noise(count, noise); } -static void abs_printout(int counter, __u64 *count) +static void abs_printout(int counter, __u64 *count, __u64 *noise) { fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); - if (runtime_cycles && + if (runtime_cycles_avg && attrs[counter].type == PERF_TYPE_HARDWARE && attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) { - fprintf(stderr, " # %10.3f IPC", - (double)count[0] / (double)runtime_cycles); - - return; + fprintf(stderr, " # %10.3f IPC ", + (double)count[0] / (double)runtime_cycles_avg); + } else { + if (runtime_nsecs_avg) { + fprintf(stderr, " # %10.3f M/sec", + (double)count[0]/runtime_nsecs_avg*1000.0); + } } - - if (runtime_nsecs) - fprintf(stderr, " # %10.3f M/sec", - (double)count[0]/runtime_nsecs*1000.0); + print_noise(count, noise); } /* @@ -223,11 +298,12 @@ static void abs_printout(int counter, __u64 *count) */ static void print_counter(int counter) { - __u64 *count; + __u64 *count, *noise; int scaled; - count = event_res[counter]; - scaled = event_scaled[counter]; + count = event_res_avg[counter]; + noise = event_res_noise[counter]; + scaled = event_scaled_avg[counter]; if (scaled == -1) { fprintf(stderr, " %14s %-20s\n", @@ -236,9 +312,9 @@ static void print_counter(int counter) } if (nsec_counter(counter)) - nsec_printout(counter, count); + nsec_printout(counter, count, noise); else - abs_printout(counter, count); + abs_printout(counter, count, noise); if (scaled) fprintf(stderr, " (scaled from %.2f%%)", @@ -247,43 +323,83 @@ static void print_counter(int counter) fprintf(stderr, "\n"); } -static int do_perf_stat(int argc, const char **argv) +/* + * Normalize noise values down to stddev: + */ +static void normalize(__u64 *val) { - unsigned long long t0, t1; - int counter; - int status; - int pid; - int i; - - if (!system_wide) - nr_cpus = 1; + double res; - for (counter = 0; counter < nr_counters; counter++) - create_perf_stat_counter(counter); + res = (double)*val / (run_count * sqrt((double)run_count)); - /* - * Enable counters and exec the command: - */ - t0 = rdclock(); - prctl(PR_TASK_PERF_COUNTERS_ENABLE); + *val = (__u64)res; +} - if ((pid = fork()) < 0) - perror("failed to fork"); +/* + * Calculate the averages and noises: + */ +static void calc_avg(void) +{ + int i, j; + + for (i = 0; i < run_count; i++) { + runtime_nsecs_avg += runtime_nsecs[i]; + walltime_nsecs_avg += walltime_nsecs[i]; + runtime_cycles_avg += runtime_cycles[i]; + + for (j = 0; j < nr_counters; j++) { + event_res_avg[j][0] += event_res[i][j][0]; + event_res_avg[j][1] += event_res[i][j][1]; + event_res_avg[j][2] += event_res[i][j][2]; + event_scaled_avg[j] += event_scaled[i][j]; + } + } + runtime_nsecs_avg /= run_count; + walltime_nsecs_avg /= run_count; + runtime_cycles_avg /= run_count; + + for (j = 0; j < nr_counters; j++) { + event_res_avg[j][0] /= run_count; + event_res_avg[j][1] /= run_count; + event_res_avg[j][2] /= run_count; + } - if (!pid) { - if (execvp(argv[0], (char **)argv)) { - perror(argv[0]); - exit(-1); + for (i = 0; i < run_count; i++) { + runtime_nsecs_noise += + abs((__s64)(runtime_nsecs[i] - runtime_nsecs_avg)); + walltime_nsecs_noise += + abs((__s64)(walltime_nsecs[i] - walltime_nsecs_avg)); + runtime_cycles_noise += + abs((__s64)(runtime_cycles[i] - runtime_cycles_avg)); + + for (j = 0; j < nr_counters; j++) { + event_res_noise[j][0] += + abs((__s64)(event_res[i][j][0] - event_res_avg[j][0])); + event_res_noise[j][1] += + abs((__s64)(event_res[i][j][1] - event_res_avg[j][1])); + event_res_noise[j][2] += + abs((__s64)(event_res[i][j][2] - event_res_avg[j][2])); } } - while (wait(&status) >= 0) - ; + normalize(&runtime_nsecs_noise); + normalize(&walltime_nsecs_noise); + normalize(&runtime_cycles_noise); - prctl(PR_TASK_PERF_COUNTERS_DISABLE); - t1 = rdclock(); + for (j = 0; j < nr_counters; j++) { + normalize(&event_res_noise[j][0]); + normalize(&event_res_noise[j][1]); + normalize(&event_res_noise[j][2]); + } +} + +static void print_stat(int argc, const char **argv) +{ + int i, counter; + + calc_avg(); - walltime_nsecs = t1 - t0; + run_idx = 0; fflush(stdout); @@ -293,21 +409,19 @@ static int do_perf_stat(int argc, const char **argv) for (i = 1; i < argc; i++) fprintf(stderr, " %s", argv[i]); - fprintf(stderr, "\':\n"); - fprintf(stderr, "\n"); - - for (counter = 0; counter < nr_counters; counter++) - read_counter(counter); + fprintf(stderr, "\'"); + if (run_count > 1) + fprintf(stderr, " (%d runs)", run_count); + fprintf(stderr, ":\n\n"); for (counter = 0; counter < nr_counters; counter++) print_counter(counter); fprintf(stderr, "\n"); - fprintf(stderr, " %14.9f seconds time elapsed.\n", (double)(t1-t0)/1e9); + fprintf(stderr, " %14.9f seconds time elapsed.\n", + (double)walltime_nsecs_avg/1e9); fprintf(stderr, "\n"); - - return 0; } static volatile int signr = -1; @@ -345,11 +459,15 @@ static const struct option options[] = { "scale/normalize counters"), OPT_BOOLEAN('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), + OPT_INTEGER('r', "repeat", &run_count, + "repeat command and print average + stddev (max: 100)"), OPT_END() }; int cmd_stat(int argc, const char **argv, const char *prefix) { + int status; + page_size = sysconf(_SC_PAGE_SIZE); memcpy(attrs, default_attrs, sizeof(attrs)); @@ -357,6 +475,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix) argc = parse_options(argc, argv, options, stat_usage, 0); if (!argc) usage_with_options(stat_usage, options); + if (run_count <= 0 || run_count > MAX_RUN) + usage_with_options(stat_usage, options); if (!nr_counters) nr_counters = 8; @@ -376,5 +496,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix) signal(SIGALRM, skip_signal); signal(SIGABRT, skip_signal); - return do_perf_stat(argc, argv); + status = 0; + for (run_idx = 0; run_idx < run_count; run_idx++) { + if (run_count != 1 && verbose) + fprintf(stderr, "[ perf stat: executing run #%d ... ]\n", run_idx+1); + status = run_perf_stat(argc, argv); + } + + print_stat(argc, argv); + + return status; } -- cgit From ef281a196d66b8bc2d067a3704712e5b93691fbc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 13 Jun 2009 15:40:35 +0200 Subject: perf stat: Enable raw data to be printed If -vv (very verbose) is specified, print out raw data in the following format: $ perf stat -vv -r 3 ./loop_1b_instructions [ perf stat: executing run #1 ... ] [ perf stat: executing run #2 ... ] [ perf stat: executing run #3 ... ] debug: runtime[0]: 235871872 debug: walltime[0]: 236646752 debug: runtime_cycles[0]: 755150182 debug: counter/0[0]: 235871872 debug: counter/1[0]: 235871872 debug: counter/2[0]: 235871872 debug: scaled[0]: 0 debug: counter/0[1]: 2 debug: counter/1[1]: 235870662 debug: counter/2[1]: 235870662 debug: scaled[1]: 0 debug: counter/0[2]: 1 debug: counter/1[2]: 235870437 debug: counter/2[2]: 235870437 debug: scaled[2]: 0 debug: counter/0[3]: 140 debug: counter/1[3]: 235870298 debug: counter/2[3]: 235870298 debug: scaled[3]: 0 debug: counter/0[4]: 755150182 debug: counter/1[4]: 235870145 debug: counter/2[4]: 235870145 debug: scaled[4]: 0 debug: counter/0[5]: 1001411258 debug: counter/1[5]: 235868838 debug: counter/2[5]: 235868838 debug: scaled[5]: 0 debug: counter/0[6]: 27897 debug: counter/1[6]: 235868560 debug: counter/2[6]: 235868560 debug: scaled[6]: 0 debug: counter/0[7]: 2910 debug: counter/1[7]: 235868151 debug: counter/2[7]: 235868151 debug: scaled[7]: 0 debug: runtime[0]: 235980257 debug: walltime[0]: 236770942 debug: runtime_cycles[0]: 755114546 debug: counter/0[0]: 235980257 debug: counter/1[0]: 235980257 debug: counter/2[0]: 235980257 debug: scaled[0]: 0 debug: counter/0[1]: 3 debug: counter/1[1]: 235980049 debug: counter/2[1]: 235980049 debug: scaled[1]: 0 debug: counter/0[2]: 1 debug: counter/1[2]: 235979907 debug: counter/2[2]: 235979907 debug: scaled[2]: 0 debug: counter/0[3]: 135 debug: counter/1[3]: 235979780 debug: counter/2[3]: 235979780 debug: scaled[3]: 0 debug: counter/0[4]: 755114546 debug: counter/1[4]: 235979652 debug: counter/2[4]: 235979652 debug: scaled[4]: 0 debug: counter/0[5]: 1001439771 debug: counter/1[5]: 235979304 debug: counter/2[5]: 235979304 debug: scaled[5]: 0 debug: counter/0[6]: 23723 debug: counter/1[6]: 235979050 debug: counter/2[6]: 235979050 debug: scaled[6]: 0 debug: counter/0[7]: 2213 debug: counter/1[7]: 235978820 debug: counter/2[7]: 235978820 debug: scaled[7]: 0 debug: runtime[0]: 235888002 debug: walltime[0]: 236700533 debug: runtime_cycles[0]: 754881504 debug: counter/0[0]: 235888002 debug: counter/1[0]: 235888002 debug: counter/2[0]: 235888002 debug: scaled[0]: 0 debug: counter/0[1]: 2 debug: counter/1[1]: 235887793 debug: counter/2[1]: 235887793 debug: scaled[1]: 0 debug: counter/0[2]: 1 debug: counter/1[2]: 235887645 debug: counter/2[2]: 235887645 debug: scaled[2]: 0 debug: counter/0[3]: 135 debug: counter/1[3]: 235887499 debug: counter/2[3]: 235887499 debug: scaled[3]: 0 debug: counter/0[4]: 754881504 debug: counter/1[4]: 235887368 debug: counter/2[4]: 235887368 debug: scaled[4]: 0 debug: counter/0[5]: 1001401731 debug: counter/1[5]: 235887024 debug: counter/2[5]: 235887024 debug: scaled[5]: 0 debug: counter/0[6]: 24212 debug: counter/1[6]: 235886786 debug: counter/2[6]: 235886786 debug: scaled[6]: 0 debug: counter/0[7]: 1824 debug: counter/1[7]: 235886560 debug: counter/2[7]: 235886560 debug: scaled[7]: 0 Performance counter stats for '/home/mingo/loop_1b_instructions' (3 runs): 235.913377 task-clock-msecs # 0.997 CPUs ( +- 0.011% ) 2 context-switches # 0.000 M/sec ( +- 0.000% ) 1 CPU-migrations # 0.000 M/sec ( +- 0.000% ) 136 page-faults # 0.001 M/sec ( +- 0.730% ) 755048744 cycles # 3200.534 M/sec ( +- 0.009% ) 1001417586 instructions # 1.326 IPC ( +- 0.001% ) 25277 cache-references # 0.107 M/sec ( +- 3.988% ) 2315 cache-misses # 0.010 M/sec ( +- 9.845% ) 0.236706075 seconds time elapsed. This allows the summary stats to be validated. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 9eb42b1ae784..e5b3c0ff03a9 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -324,9 +324,9 @@ static void print_counter(int counter) } /* - * Normalize noise values down to stddev: + * normalize_noise noise values down to stddev: */ -static void normalize(__u64 *val) +static void normalize_noise(__u64 *val) { double res; @@ -335,6 +335,13 @@ static void normalize(__u64 *val) *val = (__u64)res; } +static void update_avg(const char *name, int idx, __u64 *avg, __u64 *val) +{ + *avg += *val; + + if (verbose > 1) + fprintf(stderr, "debug: %20s[%d]: %Ld\n", name, idx, *val); +} /* * Calculate the averages and noises: */ @@ -342,16 +349,23 @@ static void calc_avg(void) { int i, j; + if (verbose > 1) + fprintf(stderr, "\n"); + for (i = 0; i < run_count; i++) { - runtime_nsecs_avg += runtime_nsecs[i]; - walltime_nsecs_avg += walltime_nsecs[i]; - runtime_cycles_avg += runtime_cycles[i]; + update_avg("runtime", 0, &runtime_nsecs_avg, runtime_nsecs + i); + update_avg("walltime", 0, &walltime_nsecs_avg, walltime_nsecs + i); + update_avg("runtime_cycles", 0, &runtime_cycles_avg, runtime_cycles + i); for (j = 0; j < nr_counters; j++) { - event_res_avg[j][0] += event_res[i][j][0]; - event_res_avg[j][1] += event_res[i][j][1]; - event_res_avg[j][2] += event_res[i][j][2]; - event_scaled_avg[j] += event_scaled[i][j]; + update_avg("counter/0", j, + event_res_avg[j]+0, event_res[i][j]+0); + update_avg("counter/1", j, + event_res_avg[j]+1, event_res[i][j]+1); + update_avg("counter/2", j, + event_res_avg[j]+2, event_res[i][j]+2); + update_avg("scaled", j, + event_scaled_avg + j, event_scaled[i]+j); } } runtime_nsecs_avg /= run_count; @@ -382,14 +396,14 @@ static void calc_avg(void) } } - normalize(&runtime_nsecs_noise); - normalize(&walltime_nsecs_noise); - normalize(&runtime_cycles_noise); + normalize_noise(&runtime_nsecs_noise); + normalize_noise(&walltime_nsecs_noise); + normalize_noise(&runtime_cycles_noise); for (j = 0; j < nr_counters; j++) { - normalize(&event_res_noise[j][0]); - normalize(&event_res_noise[j][1]); - normalize(&event_res_noise[j][2]); + normalize_noise(&event_res_noise[j][0]); + normalize_noise(&event_res_noise[j][1]); + normalize_noise(&event_res_noise[j][2]); } } @@ -399,8 +413,6 @@ static void print_stat(int argc, const char **argv) calc_avg(); - run_idx = 0; - fflush(stdout); fprintf(stderr, "\n"); -- cgit From 9cffa8d53335d891cc0ecb3824a67118b3ee4b2f Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 19 Jun 2009 22:21:42 +1000 Subject: perf_counter tools: Define and use our own u64, s64 etc. definitions On 64-bit powerpc, __u64 is defined to be unsigned long rather than unsigned long long. This causes compiler warnings every time we print a __u64 value with %Lx. Rather than changing __u64, we define our own u64 to be unsigned long long on all architectures, and similarly s64 as signed long long. For consistency we also define u32, s32, u16, s16, u8 and s8. These definitions are put in a new header, types.h, because these definitions are needed in util/string.h and util/symbol.h. The main change here is the mechanical change of __[us]{64,32,16,8} to remove the "__". The other changes are: * Create types.h * Include types.h in perf.h, util/string.h and util/symbol.h * Add types.h to the LIB_H definition in Makefile * Added (u64) casts in process_overflow_event() and print_sym_table() to kill two remaining warnings. Signed-off-by: Paul Mackerras Acked-by: Peter Zijlstra Cc: benh@kernel.crashing.org LKML-Reference: <19003.33494.495844.956580@cargo.ozlabs.ibm.com> Signed-off-by: Ingo Molnar --- tools/perf/builtin-stat.c | 62 +++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 31 deletions(-) (limited to 'tools/perf/builtin-stat.c') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e5b3c0ff03a9..6d3eeac1ea25 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -85,29 +85,29 @@ static const unsigned int default_count[] = { static int run_count = 1; static int run_idx = 0; -static __u64 event_res[MAX_RUN][MAX_COUNTERS][3]; -static __u64 event_scaled[MAX_RUN][MAX_COUNTERS]; +static u64 event_res[MAX_RUN][MAX_COUNTERS][3]; +static u64 event_scaled[MAX_RUN][MAX_COUNTERS]; -//static __u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; +//static u64 event_hist[MAX_RUN][MAX_COUNTERS][3]; -static __u64 runtime_nsecs[MAX_RUN]; -static __u64 walltime_nsecs[MAX_RUN]; -static __u64 runtime_cycles[MAX_RUN]; +static u64 runtime_nsecs[MAX_RUN]; +static u64 walltime_nsecs[MAX_RUN]; +static u64 runtime_cycles[MAX_RUN]; -static __u64 event_res_avg[MAX_COUNTERS][3]; -static __u64 event_res_noise[MAX_COUNTERS][3]; +static u64 event_res_avg[MAX_COUNTERS][3]; +static u64 event_res_noise[MAX_COUNTERS][3]; -static __u64 event_scaled_avg[MAX_COUNTERS]; +static u64 event_scaled_avg[MAX_COUNTERS]; -static __u64 runtime_nsecs_avg; -static __u64 runtime_nsecs_noise; +static u64 runtime_nsecs_avg; +static u64 runtime_nsecs_noise; -static __u64 walltime_nsecs_avg; -static __u64 walltime_nsecs_noise; +static u64 walltime_nsecs_avg; +static u64 walltime_nsecs_noise; -static __u64 runtime_cycles_avg; -static __u64 runtime_cycles_noise; +static u64 runtime_cycles_avg; +static u64 runtime_cycles_noise; static void create_perf_stat_counter(int counter) { @@ -158,7 +158,7 @@ static inline int nsec_counter(int counter) */ static void read_counter(int counter) { - __u64 *count, single_count[3]; + u64 *count, single_count[3]; ssize_t res; int cpu, nv; int scaled; @@ -172,8 +172,8 @@ static void read_counter(int counter) if (fd[cpu][counter] < 0) continue; - res = read(fd[cpu][counter], single_count, nv * sizeof(__u64)); - assert(res == nv * sizeof(__u64)); + res = read(fd[cpu][counter], single_count, nv * sizeof(u64)); + assert(res == nv * sizeof(u64)); close(fd[cpu][counter]); fd[cpu][counter] = -1; @@ -251,14 +251,14 @@ static int run_perf_stat(int argc, const char **argv) return WEXITSTATUS(status); } -static void print_noise(__u64 *count, __u64 *noise) +static void print_noise(u64 *count, u64 *noise) { if (run_count > 1) fprintf(stderr, " ( +- %7.3f%% )", (double)noise[0]/(count[0]+1)*100.0); } -static void nsec_printout(int counter, __u64 *count, __u64 *noise) +static void nsec_printout(int counter, u64 *count, u64 *noise) { double msecs = (double)count[0] / 1000000; @@ -274,7 +274,7 @@ static void nsec_printout(int counter, __u64 *count, __u64 *noise) print_noise(count, noise); } -static void abs_printout(int counter, __u64 *count, __u64 *noise) +static void abs_printout(int counter, u64 *count, u64 *noise) { fprintf(stderr, " %14Ld %-20s", count[0], event_name(counter)); @@ -298,7 +298,7 @@ static void abs_printout(int counter, __u64 *count, __u64 *noise) */ static void print_counter(int counter) { - __u64 *count, *noise; + u64 *count, *noise; int scaled; count = event_res_avg[counter]; @@ -326,16 +326,16 @@ static void print_counter(int counter) /* * normalize_noise noise values down to stddev: */ -static void normalize_noise(__u64 *val) +static void normalize_noise(u64 *val) { double res; res = (double)*val / (run_count * sqrt((double)run_count)); - *val = (__u64)res; + *val = (u64)res; } -static void update_avg(const char *name, int idx, __u64 *avg, __u64 *val) +static void update_avg(const char *name, int idx, u64 *avg, u64 *val) { *avg += *val; @@ -380,19 +380,19 @@ static void calc_avg(void) for (i = 0; i < run_count; i++) { runtime_nsecs_noise += - abs((__s64)(runtime_nsecs[i] - runtime_nsecs_avg)); + abs((s64)(runtime_nsecs[i] - runtime_nsecs_avg)); walltime_nsecs_noise += - abs((__s64)(walltime_nsecs[i] - walltime_nsecs_avg)); + abs((s64)(walltime_nsecs[i] - walltime_nsecs_avg)); runtime_cycles_noise += - abs((__s64)(runtime_cycles[i] - runtime_cycles_avg)); + abs((s64)(runtime_cycles[i] - runtime_cycles_avg)); for (j = 0; j < nr_counters; j++) { event_res_noise[j][0] += - abs((__s64)(event_res[i][j][0] - event_res_avg[j][0])); + abs((s64)(event_res[i][j][0] - event_res_avg[j][0])); event_res_noise[j][1] += - abs((__s64)(event_res[i][j][1] - event_res_avg[j][1])); + abs((s64)(event_res[i][j][1] - event_res_avg[j][1])); event_res_noise[j][2] += - abs((__s64)(event_res[i][j][2] - event_res_avg[j][2])); + abs((s64)(event_res[i][j][2] - event_res_avg[j][2])); } } -- cgit