From 83e1986032dfcd3f9e9fc0d06e11d9153edae19b Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:36 +0300 Subject: perf script: Allow time to be displayed in nanoseconds Add option --ns to display time to 9 decimal places. That is useful in some cases, for example when using Intel PT cycle accurate mode. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-6-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 284a76e04628..092843968791 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -29,6 +29,7 @@ static bool no_callchain; static bool latency_format; static bool system_wide; static bool print_flags; +static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -415,7 +416,10 @@ static void print_sample_start(struct perf_sample *sample, secs = nsecs / NSECS_PER_SEC; nsecs -= secs * NSECS_PER_SEC; usecs = nsecs / NSECS_PER_USEC; - printf("%5lu.%06lu: ", secs, usecs); + if (nanosecs) + printf("%5lu.%09llu: ", secs, nsecs); + else + printf("%5lu.%06lu: ", secs, usecs); } } @@ -1695,6 +1699,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN('\0', "show-switch-events", &script.show_switch_events, "Show context switch events (if recorded)"), OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_BOOLEAN(0, "ns", &nanosecs, + "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options", itrace_parse_synth_opts), -- cgit From 03cd1fed2b8730271d3a8dbabd87989abddc33c4 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:49 +0300 Subject: perf script: Add a setting for maximum stack depth Add a setting for maximum stack depth in preparation for allowing for synthesized callchains. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-19-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 092843968791..a65b498df097 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -33,6 +33,8 @@ static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); +static unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; + enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, PERF_OUTPUT_TID = 1U << 1, @@ -475,7 +477,7 @@ static void print_sample_bts(union perf_event *event, } } perf_evsel__print_ip(evsel, sample, al, print_opts, - PERF_MAX_STACK_DEPTH); + scripting_max_stack); } /* print branch_to information */ @@ -552,7 +554,7 @@ static void process_event(union perf_event *event, struct perf_sample *sample, perf_evsel__print_ip(evsel, sample, al, output[attr->type].print_ip_opts, - PERF_MAX_STACK_DEPTH); + scripting_max_stack); } if (PRINT_FIELD(IREGS)) -- cgit From 44cbe7295c3808977159f500a5bcdebf12a7db5f Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:50 +0300 Subject: perf scripting python: Allow for max_stack greater than PERF_MAX_STACK_DEPTH Use the scripting_max_stack value to allow for values greater than PERF_MAX_STACK_DEPTH. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-20-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a65b498df097..5c3c02d5af53 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -33,7 +33,7 @@ static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); -static unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; +unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; enum perf_output_field { PERF_OUTPUT_COMM = 1U << 0, -- cgit From 3c5b645faee7afbd417f6127694adbd26778a9eb Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 25 Sep 2015 16:15:51 +0300 Subject: perf script: Make scripting_max_stack value allow for synthesized callchains perf script has a setting to set the maximum stack depth when processing callchains. The setting defaults to the hard-coded maximum definition PERF_MAX_STACK_DEPTH which is 127. It is possible, when processing instruction traces, to synthesize callchains. Synthesized callchains do not have the kernel size limitation and are whatever size the user requests, although validation presently prevents the user requested a value greater that 1024. The default value is 16. To allow for synthesized callchains, make the scripting_max_stack value at least the same size as the synthesized callchain size. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Link: http://lkml.kernel.org/r/1443186956-18718-21-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 5c3c02d5af53..8ce1c6bbfa45 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1748,6 +1748,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) } } + if (itrace_synth_opts.callchain && + itrace_synth_opts.callchain_sz > scripting_max_stack) + scripting_max_stack = itrace_synth_opts.callchain_sz; + /* make sure PERF_EXEC_PATH is set for scripts */ perf_set_argv_exec_path(perf_exec_path()); -- cgit From d2b5a315ae84d235f00761468885c466f81d7805 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 16 Oct 2015 12:41:25 +0200 Subject: perf script: Check output fields only for samples There's no need to check sampling output fields for events without perf_event_attr::sample_type field set. Signed-off-by: Jiri Olsa Tested-by: Kan Liang Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444992092-17897-51-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 8ce1c6bbfa45..2653c0273b89 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -686,7 +686,10 @@ static int process_attr(struct perf_tool *tool, union perf_event *event, set_print_ip_opts(&evsel->attr); - return perf_evsel__check_attr(evsel, scr->session); + if (evsel->attr.sample_type) + err = perf_evsel__check_attr(evsel, scr->session); + + return err; } static int process_comm_event(struct perf_tool *tool, -- cgit From c71183697250b356be6c7c1abc2e9a74073e1dca Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Sun, 25 Oct 2015 00:49:27 +0900 Subject: perf tools: Introduce usage_with_options_msg() Now usage_with_options() setup a pager before printing message so normal printf() or pr_err() will not be shown. The usage_with_options_msg() can be used to print some help message before usage strings. Signed-off-by: Namhyung Kim Acked-by: Masami Hiramatsu Cc: David Ahern Cc: Jiri Olsa Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1445701767-12731-4-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 2653c0273b89..278acb22f029 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1767,9 +1767,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) rep_script_path = get_script_path(argv[0], REPORT_SUFFIX); if (!rec_script_path && !rep_script_path) { - fprintf(stderr, " Couldn't find script %s\n\n See perf" + usage_with_options_msg(script_usage, options, + "Couldn't find script `%s'\n\n See perf" " script -l for available scripts.\n", argv[0]); - usage_with_options(script_usage, options); } if (is_top_script(argv[0])) { @@ -1780,10 +1780,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) rep_args = has_required_arg(rep_script_path); rec_args = (argc - 1) - rep_args; if (rec_args < 0) { - fprintf(stderr, " %s script requires options." + usage_with_options_msg(script_usage, options, + "`%s' script requires options." "\n\n See perf script -l for available " "scripts and options.\n", argv[0]); - usage_with_options(script_usage, options); } } -- cgit From dc323ce8e72d6d1beb9af9bbd29c4d55ce3d7fb0 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 31 Aug 2015 18:41:13 +0200 Subject: perf script: Enable printing of branch stack This patch improves perf script by enabling printing of the branch stack via the 'brstack' and 'brstacksym' arguments to the field selection option -F. The option is off by default and operates only if the perf.data file has branch stack content. The branches are printed in to/from pairs. The most recent branch is printed first. The number of branch entries vary based on the underlying hardware and filtering used. The brstack prints FROM/TO addresses in raw hexadecimal format. The brstacksym prints FROM/TO addresses in symbolic form wherever possible. $ perf script -F ip,brstack 5d3000 0x401aa0/0x5d2000/M/-/-/-/0 ... $ perf script -F ip,brstacksym 4011e0 noploop+0x0/noploop+0x0/P/-/-/0 The notation F/T/M/X/A/C describes the attributes of the branch. F=from, T=to, M/P=misprediction/prediction, X=TSX, A=TSX abort, C=cycles (SKL) Signed-off-by: Stephane Eranian Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Yuanfang Chen Link: http://lkml.kernel.org/r/1441039273-16260-5-git-send-email-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 82 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-script.c') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 278acb22f029..72b5deb4bd79 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -51,6 +51,8 @@ enum perf_output_field { PERF_OUTPUT_SRCLINE = 1U << 12, PERF_OUTPUT_PERIOD = 1U << 13, PERF_OUTPUT_IREGS = 1U << 14, + PERF_OUTPUT_BRSTACK = 1U << 15, + PERF_OUTPUT_BRSTACKSYM = 1U << 16, }; struct output_option { @@ -72,6 +74,8 @@ struct output_option { {.str = "srcline", .field = PERF_OUTPUT_SRCLINE}, {.str = "period", .field = PERF_OUTPUT_PERIOD}, {.str = "iregs", .field = PERF_OUTPUT_IREGS}, + {.str = "brstack", .field = PERF_OUTPUT_BRSTACK}, + {.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM}, }; /* default set to maintain compatibility with current format */ @@ -425,6 +429,77 @@ static void print_sample_start(struct perf_sample *sample, } } +static inline char +mispred_str(struct branch_entry *br) +{ + if (!(br->flags.mispred || br->flags.predicted)) + return '-'; + + return br->flags.predicted ? 'P' : 'M'; +} + +static void print_sample_brstack(union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct thread *thread __maybe_unused, + struct perf_event_attr *attr __maybe_unused) +{ + struct branch_stack *br = sample->branch_stack; + u64 i; + + if (!(br && br->nr)) + return; + + for (i = 0; i < br->nr; i++) { + printf(" 0x%"PRIx64"/0x%"PRIx64"/%c/%c/%c/%d ", + br->entries[i].from, + br->entries[i].to, + mispred_str( br->entries + i), + br->entries[i].flags.in_tx? 'X' : '-', + br->entries[i].flags.abort? 'A' : '-', + br->entries[i].flags.cycles); + } +} + +static void print_sample_brstacksym(union perf_event *event __maybe_unused, + struct perf_sample *sample, + struct thread *thread __maybe_unused, + struct perf_event_attr *attr __maybe_unused) +{ + struct branch_stack *br = sample->branch_stack; + struct addr_location alf, alt; + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + u64 i, from, to; + + if (!(br && br->nr)) + return; + + for (i = 0; i < br->nr; i++) { + + memset(&alf, 0, sizeof(alf)); + memset(&alt, 0, sizeof(alt)); + from = br->entries[i].from; + to = br->entries[i].to; + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, from, &alf); + if (alf.map) + alf.sym = map__find_symbol(alf.map, alf.addr, NULL); + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, to, &alt); + if (alt.map) + alt.sym = map__find_symbol(alt.map, alt.addr, NULL); + + symbol__fprintf_symname_offs(alf.sym, &alf, stdout); + putchar('/'); + symbol__fprintf_symname_offs(alt.sym, &alt, stdout); + printf("/%c/%c/%c/%d ", + mispred_str( br->entries + i), + br->entries[i].flags.in_tx? 'X' : '-', + br->entries[i].flags.abort? 'A' : '-', + br->entries[i].flags.cycles); + } +} + + static void print_sample_addr(union perf_event *event, struct perf_sample *sample, struct thread *thread, @@ -560,6 +635,11 @@ static void process_event(union perf_event *event, struct perf_sample *sample, if (PRINT_FIELD(IREGS)) print_sample_iregs(event, sample, thread, attr); + if (PRINT_FIELD(BRSTACK)) + print_sample_brstack(event, sample, thread, attr); + else if (PRINT_FIELD(BRSTACKSYM)) + print_sample_brstacksym(event, sample, thread, attr); + printf("\n"); } @@ -1681,7 +1761,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "comma separated output fields prepend with 'type:'. " "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," - "addr,symoff,period,iregs,flags", parse_output_fields), + "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", -- cgit