From 94f0705eee70cb256d21c9abe7ce44ffbe093555 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 10 May 2023 23:27:24 -0700 Subject: perf annotate: Parse x86 SIB addressing properly When the source argument of the "mov" instruction looks like below, it didn't parse the whole operand and just stopped at the first comma. mov (%rbx,%rax,1),%rcx Fix it by checking the parentheses and move it to the closing one. Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Andi Kleen Cc: Ingo Molnar Cc: Jiri Olsa Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230511062725.514752-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 11992cfe271c..b708bbc49c9e 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -558,6 +558,19 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy return -1; *s = '\0'; + + /* + * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) + * then it needs to have the closing parenthesis. + */ + if (strchr(ops->raw, '(')) { + *s = ','; + s = strchr(ops->raw, ')'); + if (s == NULL || s[1] != ',') + return -1; + *++s = '\0'; + } + ops->source.raw = strdup(ops->raw); *s = ','; -- cgit From d0b35979986e3bd03cb2f2e887e0b8036ae06198 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 24 May 2023 13:50:53 -0700 Subject: perf annotate: Handle x86 instruction suffix generally In AT&T asm syntax, most of x86 instructions can have size suffix like b, w, l or q. Instead of adding all these instructions in the table, we can handle them in a general way. For example, it can try to find an instruction as is. If not found, assuming it has a suffix and it'd try again without the suffix if it's one of the allowed suffixes. This way, we can reduce the instruction table size for duplicated entries of the same instructions with a different suffix. If an instruction xyz and others like xyz are completely different ones, then they both need to be listed in the table so that they can be found before the second attempt (without the suffix). Reviewed-by: Adrian Hunter Reviewed-by: Masami Hiramatsu Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230524205054.3087004-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b708bbc49c9e..7f05f2a2aa83 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -70,6 +70,7 @@ struct arch { struct ins_ops *(*associate_instruction_ops)(struct arch *arch, const char *name); bool sorted_instructions; bool initialized; + const char *insn_suffix; void *priv; unsigned int model; unsigned int family; @@ -179,6 +180,7 @@ static struct arch architectures[] = { .init = x86__annotate_init, .instructions = x86__instructions, .nr_instructions = ARRAY_SIZE(x86__instructions), + .insn_suffix = "bwlq", .objdump = { .comment_char = '#', }, @@ -720,6 +722,26 @@ static struct ins_ops *__ins__find(struct arch *arch, const char *name) } ins = bsearch(name, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); + if (ins) + return ins->ops; + + if (arch->insn_suffix) { + char tmp[32]; + char suffix; + size_t len = strlen(name); + + if (len == 0 || len >= sizeof(tmp)) + return NULL; + + suffix = name[len - 1]; + if (strchr(arch->insn_suffix, suffix) == NULL) + return NULL; + + strcpy(tmp, name); + tmp[len - 1] = '\0'; /* remove the suffix and check again */ + + ins = bsearch(tmp, arch->instructions, nmemb, sizeof(struct ins), ins__key_cmp); + } return ins ? ins->ops : NULL; } -- cgit From d3d53b2e9617ea606aae91a013163895f037de96 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 8 Jun 2023 16:28:18 -0700 Subject: perf annotate: Fix parse_objdump_line memory leak fileloc is used to hold a previous line, before overwriting it ensure the previous contents is freed. Free the storage once done in symbol__disassemble. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ali Saidi Cc: Andi Kleen Cc: Athira Rajeev Cc: Brian Robbins Cc: Changbin Du Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: Fangrui Song Cc: German Gomez Cc: Ingo Molnar Cc: Ivan Babrou Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kan Liang Cc: Leo Yan Cc: Liam Howlett Cc: Mark Rutland Cc: Miguel Ojeda Cc: Mike Leach Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Wenyu Liu Cc: Will Deacon Cc: Yang Jihong Cc: Ye Xingchen Cc: Yuan Can Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230608232823.4027869-22-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 7f05f2a2aa83..57ef616cdbfd 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1546,6 +1546,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, /* /filename:linenr ? Save line number and ignore. */ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) { *line_nr = atoi(parsed_line + match[1].rm_so); + free(*fileloc); *fileloc = strdup(parsed_line); return 0; } @@ -1594,7 +1595,6 @@ static int symbol__parse_objdump_line(struct symbol *sym, } annotation_line__add(&dl->al, ¬es->src->source); - return 0; } @@ -2136,6 +2136,7 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) nline++; } free(line); + free(fileloc); err = finish_command(&objdump_process); if (err) -- cgit From 625db36e6c53b39c664b7fcb509207d26ac58ea6 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 8 Jun 2023 16:28:22 -0700 Subject: perf srcline: Change free_srcline to zfree_srcline Make use after free more unlikely. Signed-off-by: Ian Rogers Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Ali Saidi Cc: Andi Kleen Cc: Athira Rajeev Cc: Brian Robbins Cc: Changbin Du Cc: Dmitrii Dolgov <9erthalion6@gmail.com> Cc: Fangrui Song Cc: German Gomez Cc: Ingo Molnar Cc: Ivan Babrou Cc: James Clark Cc: Jing Zhang Cc: Jiri Olsa Cc: John Garry Cc: K Prateek Nayak Cc: Kan Liang Cc: Leo Yan Cc: Liam Howlett Cc: Mark Rutland Cc: Miguel Ojeda Cc: Mike Leach Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Sean Christopherson Cc: Steinar H. Gunderson Cc: Suzuki Poulouse Cc: Wenyu Liu Cc: Will Deacon Cc: Yang Jihong Cc: Ye Xingchen Cc: Yuan Can Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Link: https://lore.kernel.org/r/20230608232823.4027869-26-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 57ef616cdbfd..bde890cfa620 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1218,7 +1218,7 @@ static void annotation_line__init(struct annotation_line *al, static void annotation_line__exit(struct annotation_line *al) { - free_srcline(al->path); + zfree_srcline(&al->path); zfree(&al->line); } -- cgit From 103b3d2f94732fb1bc796e68e4cdfbcd731bbeaa Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 12 Jun 2023 16:00:26 -0700 Subject: perf annotate: Allow whitespace between insn operands The llvm-objdump adds a space between the operands while GNU objdump does not. Allow a space to handle the both. In GNU objdump: Disassembly of section .text: here | ffffffff81000000 <_stext>: v ffffffff81000000: 48 8d 25 51 1f 40 01 lea 0x1401f51(%rip),%rsp ffffffff81000007: e8 d4 00 00 00 call ffffffff810000e0 ffffffff8100000c: 48 8d 3d ed ff ff ff lea -0x13(%rip),%rdi In llvm-objdump: Disassembly of section .text: here | ffffffff81000000 : v ffffffff81000000: 48 8d 25 51 1f 40 01 leaq 20979537(%rip), %rsp ffffffff81000007: e8 d4 00 00 00 callq 0xffffffff810000e0 ffffffff8100000c: 48 8d 3d ed ff ff ff leaq -19(%rip), %rdi Signed-off-by: Namhyung Kim Acked-by: Ian Rogers Cc: Adrian Hunter Cc: Ingo Molnar Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20230612230026.3887586-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index bde890cfa620..cdd1924a4418 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -579,7 +579,7 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map_sy if (ops->source.raw == NULL) return -1; - target = ++s; + target = skip_spaces(++s); comment = strchr(s, arch->objdump.comment_char); if (comment != NULL) -- cgit From 66dc1920f6bbc172ee35520d024977d5330df842 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Fri, 25 Nov 2022 12:42:09 +0100 Subject: perf annotate: Work with vmlinux outside symfs It is currently possible to use --symfs along with a vmlinux which lies outside of the symfs by passing an absolute path to --vmlinux, thanks to the check in dso__load_vmlinux() which handles this explicitly. However, the annotate code lacks this check and thus 'perf annotate' does not work ("Internal error: Invalid -1 error code") for kernel functions with this combination. Add the missing handling. Signed-off-by: Vincent Whitchurch Cc: Alexander Shishkin Cc: Ingo Molnar Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: kernel@axis.com Link: https://lore.kernel.org/r/20221125114210.2353820-1-vincent.whitchurch@axis.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index cdd1924a4418..43865601f96c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1731,7 +1731,10 @@ fallback: * cache, or is just a kallsyms file, well, lets hope that this * DSO is the same as when 'perf record' ran. */ - __symbol__join_symfs(filename, filename_size, dso->long_name); + if (dso->kernel && dso->long_name[0] == '/') + snprintf(filename, filename_size, "%s", dso->long_name); + else + __symbol__join_symfs(filename, filename_size, dso->long_name); mutex_lock(&dso->lock); if (access(filename, R_OK) && errno == ENOENT && dso->nsinfo) { -- cgit From 2e9f9d4a729f12b4bc3fa60406374327b1809abe Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Wed, 14 Jun 2023 21:07:15 -0700 Subject: perf annotation: Switch lock from a mutex to a sharded_mutex Remove the "struct mutex lock" variable from annotation that is allocated per symbol. This removes in the region of 40 bytes per symbol allocation. Use a sharded mutex where the number of shards is set to the number of CPUs. Assuming good hashing of the annotation (done based on the pointer), this means in order to contend there needs to be more threads than CPUs, which is not currently true in any perf command. Were contention an issue it is straightforward to increase the number of shards in the mutex. On my Debian/glibc based machine, this reduces the size of struct annotation from 136 bytes to 96 bytes, or nearly 30%. Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Andres Freund Cc: Mark Rutland Cc: Yuan Can Cc: Peter Zijlstra Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Huacai Chen Cc: Jiri Olsa Cc: Masami Hiramatsu Cc: Alexander Shishkin Cc: Kan Liang Cc: Ingo Molnar Link: https://lore.kernel.org/r/20230615040715.2064350-2-irogers@google.com Signed-off-by: Namhyung Kim --- tools/perf/util/annotate.c | 66 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 57 insertions(+), 9 deletions(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 43865601f96c..77c816400719 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -32,6 +32,7 @@ #include "block-range.h" #include "string2.h" #include "util/event.h" +#include "util/sharded_mutex.h" #include "arch/common.h" #include "namespaces.h" #include @@ -856,7 +857,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym) { struct annotation *notes = symbol__annotation(sym); - mutex_lock(¬es->lock); + annotation__lock(notes); if (notes->src != NULL) { memset(notes->src->histograms, 0, notes->src->nr_histograms * notes->src->sizeof_sym_hist); @@ -864,7 +865,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym) memset(notes->src->cycles_hist, 0, symbol__size(sym) * sizeof(struct cyc_hist)); } - mutex_unlock(¬es->lock); + annotation__unlock(notes); } static int __symbol__account_cycles(struct cyc_hist *ch, @@ -1121,7 +1122,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) notes->hit_insn = 0; notes->cover_insn = 0; - mutex_lock(¬es->lock); + annotation__lock(notes); for (offset = size - 1; offset >= 0; --offset) { struct cyc_hist *ch; @@ -1140,7 +1141,7 @@ void annotation__compute_ipc(struct annotation *notes, size_t size) notes->have_cycles = true; } } - mutex_unlock(¬es->lock); + annotation__unlock(notes); } int addr_map_symbol__inc_samples(struct addr_map_symbol *ams, struct perf_sample *sample, @@ -1291,17 +1292,64 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r return ins__scnprintf(&dl->ins, bf, size, &dl->ops, max_ins_name); } -void annotation__init(struct annotation *notes) +void annotation__exit(struct annotation *notes) { - mutex_init(¬es->lock); + annotated_source__delete(notes->src); } -void annotation__exit(struct annotation *notes) +static struct sharded_mutex *sharded_mutex; + +static void annotation__init_sharded_mutex(void) { - annotated_source__delete(notes->src); - mutex_destroy(¬es->lock); + /* As many mutexes as there are CPUs. */ + sharded_mutex = sharded_mutex__new(cpu__max_present_cpu().cpu); +} + +static size_t annotation__hash(const struct annotation *notes) +{ + return (size_t)notes; } +static struct mutex *annotation__get_mutex(const struct annotation *notes) +{ + static pthread_once_t once = PTHREAD_ONCE_INIT; + + pthread_once(&once, annotation__init_sharded_mutex); + if (!sharded_mutex) + return NULL; + + return sharded_mutex__get_mutex(sharded_mutex, annotation__hash(notes)); +} + +void annotation__lock(struct annotation *notes) + NO_THREAD_SAFETY_ANALYSIS +{ + struct mutex *mutex = annotation__get_mutex(notes); + + if (mutex) + mutex_lock(mutex); +} + +void annotation__unlock(struct annotation *notes) + NO_THREAD_SAFETY_ANALYSIS +{ + struct mutex *mutex = annotation__get_mutex(notes); + + if (mutex) + mutex_unlock(mutex); +} + +bool annotation__trylock(struct annotation *notes) +{ + struct mutex *mutex = annotation__get_mutex(notes); + + if (!mutex) + return false; + + return mutex_trylock(mutex); +} + + static void annotation_line__add(struct annotation_line *al, struct list_head *head) { list_add_tail(&al->node, head); -- cgit From 4ca0d340ce206985d9b9956993d7c81eeb1d3198 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 20 Jun 2023 21:20:25 +0800 Subject: perf annotate: Fix instruction association and parsing for LoongArch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the perf annotate view for LoongArch, there is no arrowed line pointing to the target from the branch instruction. This issue is caused by incorrect instruction association and parsing. $ perf record alloc-6276705c94ad1398 # rust benchmark $ perf report 0.28 │ ori $a1, $zero, 0x63 │ move $a2, $zero 10.55 │ addi.d $a3, $a2, 1(0x1) │ sltu $a4, $a3, $s7 9.53 │ masknez $a4, $s7, $a4 │ sub.d $a3, $a3, $a4 12.12 │ st.d $a1, $fp, 24(0x18) │ st.d $a3, $fp, 16(0x10) 16.29 │ slli.d $a2, $a2, 0x2 │ ldx.w $a2, $s8, $a2 12.77 │ st.w $a2, $sp, 724(0x2d4) │ st.w $s0, $sp, 720(0x2d0) 7.03 │ addi.d $a2, $sp, 720(0x2d0) │ addi.d $a1, $a1, -1(0xfff) 12.03 │ move $a2, $a3 │ → bne $a1, $s3, -52(0x3ffcc) # 82ce8 2.50 │ addi.d $a0, $a0, 1(0x1) This patch fixes instruction association issues, such as associating branch instructions with jump_ops instead of call_ops, and corrects false instruction matches. It also implements branch instruction parsing specifically for LoongArch. With this patch, we will be able to see the arrowed line. 0.79 │3ec: ori $a1, $zero, 0x63 │ move $a2, $zero 10.32 │3f4:┌─→addi.d $a3, $a2, 1(0x1) │ │ sltu $a4, $a3, $s7 10.44 │ │ masknez $a4, $s7, $a4 │ │ sub.d $a3, $a3, $a4 14.17 │ │ st.d $a1, $fp, 24(0x18) │ │ st.d $a3, $fp, 16(0x10) 13.15 │ │ slli.d $a2, $a2, 0x2 │ │ ldx.w $a2, $s8, $a2 11.00 │ │ st.w $a2, $sp, 724(0x2d4) │ │ st.w $s0, $sp, 720(0x2d0) 8.00 │ │ addi.d $a2, $sp, 720(0x2d0) │ │ addi.d $a1, $a1, -1(0xfff) 11.99 │ │ move $a2, $a3 │ └──bne $a1, $s3, 3f4 3.17 │ addi.d $a0, $a0, 1(0x1) Signed-off-by: WANG Rui Acked-by: Namhyung Kim Cc: Mark Rutland Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Alexander Shishkin Cc: loongarch@lists.linux.dev Cc: loongson-kernel@lists.loongnix.cn Cc: Huacai Chen Cc: Tiezhu Yang Cc: Ingo Molnar Cc: WANG Xuerui Link: https://lore.kernel.org/r/20230620132025.105563-1-wangrui@loongson.cn Signed-off-by: Namhyung Kim --- tools/perf/util/annotate.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools/perf/util/annotate.c') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 77c816400719..ba988a13dacb 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -62,6 +62,10 @@ static regex_t file_lineno; static struct ins_ops *ins__find(struct arch *arch, const char *name); static void ins__sort(struct arch *arch); static int disasm_line__parse(char *line, const char **namep, char **rawp); +static int call__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); +static int jump__scnprintf(struct ins *ins, char *bf, size_t size, + struct ins_operands *ops, int max_ins_name); struct arch { const char *name; @@ -324,7 +328,7 @@ static struct ins_ops call_ops = { bool ins__is_call(const struct ins *ins) { - return ins->ops == &call_ops || ins->ops == &s390_call_ops; + return ins->ops == &call_ops || ins->ops == &s390_call_ops || ins->ops == &loongarch_call_ops; } /* @@ -465,7 +469,7 @@ static struct ins_ops jump_ops = { bool ins__is_jump(const struct ins *ins) { - return ins->ops == &jump_ops; + return ins->ops == &jump_ops || ins->ops == &loongarch_jump_ops; } static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) -- cgit