diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-05 11:56:18 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-05 11:56:18 -0700 |
commit | 27151f177827d478508e756c7657273261aaf8a9 (patch) | |
tree | 393ee6f1b12842c87461c471c0da70e4ecd93da8 /tools/perf/scripts/python/flamegraph.py | |
parent | 58ca24158758f1784400d32743373d7d6227d018 (diff) | |
parent | c7a3828d98db2730079265b5f51933dfcef8bb5f (diff) |
Merge tag 'perf-tools-for-v5.15-2021-09-04' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tool updates from Arnaldo Carvalho de Melo:
"New features:
- Improvements for the flamegraph python script, including:
- Display perf.data header
- Display PIDs of user stacks
- Added option to change color scheme
- Default to blue/green color scheme to improve accessibility
- Correctly identify kernel stacks when debuginfo is available
- Improvements for 'perf bench futex':
- Add --mlockall parameter
- Add --broadcast and --pi to the 'requeue' sub benchmark
- Add support for PMU aliases.
- Introduce an ARM Coresight ETE decoder.
- Add a 'perf bench' entry for evlist open/close operations, to help
quantify improvements with multithreading 'perf record'.
- Allow reporting the [un]throttle PERF_RECORD_ meta event in 'perf
script's python scripting.
- Add a 'perf test' entry for PMU aliases.
- Add a 'perf test' entry for 'perf record/perf report/perf script'
pipe mode.
Fixes:
- perf script dlfilter (API for filtering via dynamically loaded
shared object introduced in v5.14) fixes and a 'perf test' entry
for it.
- Fix get_current_dir_name() compilation on Android.
- Fix issues with asciidoc and double dashes uses.
- Fix memory leaks in the BTF handling code.
- Fix leftover problems in the Documentation from the infrastructure
originally lifted from the git codebase.
- Fix *probe_vfs_getname.sh 'perf test' failures.
- Handle fd gaps in 'perf test's test__dso_data_reopen().
- Make sure to show disasembly warnings for 'perf annotate --stdio'.
- Fix output from pipe to file and vice-versa in 'perf
record/report/script'.
- Correct 'perf data -h' output.
- Fix wrong comm in system-wide mode with 'perf record --delay'.
- Do not allow --for-each-cgroup without cpu in 'perf stat'
- Make 'perf test --skip' work on shell tests.
- Fix libperf's verbose printing.
Misc improvements:
- Preparatory patches for multithreading various 'perf record' phases
(synthesizing, opening, recording, etc).
- Add sparse context/locking annotations in compiler-types.h, also to
help with the multithreading effort.
- Optimize the generation of the arch specific erno tables used in
'perf trace'.
- Optimize libperf's perf_cpu_map__max().
- Improve ARM's CoreSight warnings.
- Report collisions in AUX records.
- Improve warnings for the LLVM 'perf test' entry.
- Improve the PMU events 'perf test' codebase.
- perf test: Do not compare overheads in the zstd comp test
- Better support annotation on ARM.
- Update 'perf trace's cmd string table to decode sys_bpf() first
arg.
Vendor events:
- Add JSON events and metrics for Intel's Ice Lake, Tiger Lake and
Elhart Lake.
- Update JSON eventsand metrics for Intel's Cascade Lake and Sky Lake
servers.
Hardware tracing:
- Improvements for the ARM hardware tracing auxtrace support"
* tag 'perf-tools-for-v5.15-2021-09-04' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (130 commits)
perf tests: Add test for PMU aliases
perf pmu: Add PMU alias support
perf session: Report collisions in AUX records
perf script python: Allow reporting the [un]throttle PERF_RECORD_ meta event
perf build: Report failure for testing feature libopencsd
perf cs-etm: Show a warning for an unknown magic number
perf cs-etm: Print the decoder name
perf cs-etm: Create ETE decoder
perf cs-etm: Update OpenCSD decoder for ETE
perf cs-etm: Fix typo
perf cs-etm: Save TRCDEVARCH register
perf cs-etm: Refactor out ETMv4 header saving
perf cs-etm: Initialise architecture based on TRCIDR1
perf cs-etm: Refactor initialisation of decoder params.
tools build: Fix feature detect clean for out of source builds
perf evlist: Add evlist__for_each_entry_from() macro
perf evsel: Handle precise_ip fallback in evsel__open_cpu()
perf evsel: Move bpf_counter__install_pe() to success path in evsel__open_cpu()
perf evsel: Move test_attr__open() to success path in evsel__open_cpu()
perf evsel: Move ignore_missing_thread() to fallback code
...
Diffstat (limited to 'tools/perf/scripts/python/flamegraph.py')
-rwxr-xr-x | tools/perf/scripts/python/flamegraph.py | 108 |
1 files changed, 81 insertions, 27 deletions
diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py index 65780013f745..b6af1dd5f816 100755 --- a/tools/perf/scripts/python/flamegraph.py +++ b/tools/perf/scripts/python/flamegraph.py @@ -13,6 +13,10 @@ # Written by Andreas Gerstmayr <agerstmayr@redhat.com> # Flame Graphs invented by Brendan Gregg <bgregg@netflix.com> # Works in tandem with d3-flame-graph by Martin Spier <mspier@netflix.com> +# +# pylint: disable=missing-module-docstring +# pylint: disable=missing-class-docstring +# pylint: disable=missing-function-docstring from __future__ import print_function import sys @@ -20,16 +24,19 @@ import os import io import argparse import json +import subprocess - +# pylint: disable=too-few-public-methods class Node: - def __init__(self, name, libtype=""): + def __init__(self, name, libtype): self.name = name + # "root" | "kernel" | "" + # "" indicates user space self.libtype = libtype self.value = 0 self.children = [] - def toJSON(self): + def to_json(self): return { "n": self.name, "l": self.libtype, @@ -41,7 +48,7 @@ class Node: class FlameGraphCLI: def __init__(self, args): self.args = args - self.stack = Node("root") + self.stack = Node("all", "root") if self.args.format == "html" and \ not os.path.isfile(self.args.template): @@ -53,13 +60,21 @@ class FlameGraphCLI: file=sys.stderr) sys.exit(1) - def find_or_create_node(self, node, name, dso): - libtype = "kernel" if dso == "[kernel.kallsyms]" else "" - if name is None: - name = "[unknown]" + @staticmethod + def get_libtype_from_dso(dso): + """ + when kernel-debuginfo is installed, + dso points to /usr/lib/debug/lib/modules/*/vmlinux + """ + if dso and (dso == "[kernel.kallsyms]" or dso.endswith("/vmlinux")): + return "kernel" + return "" + + @staticmethod + def find_or_create_node(node, name, libtype): for child in node.children: - if child.name == name and child.libtype == libtype: + if child.name == name: return child child = Node(name, libtype) @@ -67,30 +82,65 @@ class FlameGraphCLI: return child def process_event(self, event): - node = self.find_or_create_node(self.stack, event["comm"], None) + pid = event.get("sample", {}).get("pid", 0) + # event["dso"] sometimes contains /usr/lib/debug/lib/modules/*/vmlinux + # for user-space processes; let's use pid for kernel or user-space distinction + if pid == 0: + comm = event["comm"] + libtype = "kernel" + else: + comm = "{} ({})".format(event["comm"], pid) + libtype = "" + node = self.find_or_create_node(self.stack, comm, libtype) + if "callchain" in event: - for entry in reversed(event['callchain']): - node = self.find_or_create_node( - node, entry.get("sym", {}).get("name"), event.get("dso")) + for entry in reversed(event["callchain"]): + name = entry.get("sym", {}).get("name", "[unknown]") + libtype = self.get_libtype_from_dso(entry.get("dso")) + node = self.find_or_create_node(node, name, libtype) else: - node = self.find_or_create_node( - node, entry.get("symbol"), event.get("dso")) + name = event.get("symbol", "[unknown]") + libtype = self.get_libtype_from_dso(event.get("dso")) + node = self.find_or_create_node(node, name, libtype) node.value += 1 + def get_report_header(self): + if self.args.input == "-": + # when this script is invoked with "perf script flamegraph", + # no perf.data is created and we cannot read the header of it + return "" + + try: + output = subprocess.check_output(["perf", "report", "--header-only"]) + return output.decode("utf-8") + except Exception as err: # pylint: disable=broad-except + print("Error reading report header: {}".format(err), file=sys.stderr) + return "" + def trace_end(self): - json_str = json.dumps(self.stack, default=lambda x: x.toJSON()) + stacks_json = json.dumps(self.stack, default=lambda x: x.to_json()) if self.args.format == "html": + report_header = self.get_report_header() + options = { + "colorscheme": self.args.colorscheme, + "context": report_header + } + options_json = json.dumps(options) + try: - with io.open(self.args.template, encoding="utf-8") as f: - output_str = f.read().replace("/** @flamegraph_json **/", - json_str) - except IOError as e: - print("Error reading template file: {}".format(e), file=sys.stderr) + with io.open(self.args.template, encoding="utf-8") as template: + output_str = ( + template.read() + .replace("/** @options_json **/", options_json) + .replace("/** @flamegraph_json **/", stacks_json) + ) + except IOError as err: + print("Error reading template file: {}".format(err), file=sys.stderr) sys.exit(1) output_fn = self.args.output or "flamegraph.html" else: - output_str = json_str + output_str = stacks_json output_fn = self.args.output or "stacks.json" if output_fn == "-": @@ -101,8 +151,8 @@ class FlameGraphCLI: try: with io.open(output_fn, "w", encoding="utf-8") as out: out.write(output_str) - except IOError as e: - print("Error writing output file: {}".format(e), file=sys.stderr) + except IOError as err: + print("Error writing output file: {}".format(err), file=sys.stderr) sys.exit(1) @@ -115,12 +165,16 @@ if __name__ == "__main__": help="output file name") parser.add_argument("--template", default="/usr/share/d3-flame-graph/d3-flamegraph-base.html", - help="path to flamegraph HTML template") + help="path to flame graph HTML template") + parser.add_argument("--colorscheme", + default="blue-green", + help="flame graph color scheme", + choices=["blue-green", "orange"]) parser.add_argument("-i", "--input", help=argparse.SUPPRESS) - args = parser.parse_args() - cli = FlameGraphCLI(args) + cli_args = parser.parse_args() + cli = FlameGraphCLI(cli_args) process_event = cli.process_event trace_end = cli.trace_end |