diff options
Diffstat (limited to 'tools/perf/arch/x86/util')
-rw-r--r-- | tools/perf/arch/x86/util/intel-pt.c | 16 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/mem-events.c | 6 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/mem-events.h | 1 | ||||
-rw-r--r-- | tools/perf/arch/x86/util/pmu.c | 288 |
4 files changed, 302 insertions, 9 deletions
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index 8f235d8b67b6..add33cb5d1da 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -19,6 +19,7 @@ #include "../../../util/evlist.h" #include "../../../util/evsel.h" #include "../../../util/evsel_config.h" +#include "../../../util/config.h" #include "../../../util/cpumap.h" #include "../../../util/mmap.h" #include <subcmd/parse-options.h> @@ -52,6 +53,7 @@ struct intel_pt_recording { struct perf_pmu *intel_pt_pmu; int have_sched_switch; struct evlist *evlist; + bool all_switch_events; bool snapshot_mode; bool snapshot_init_done; size_t snapshot_size; @@ -794,7 +796,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr, bool cpu_wide = !target__none(&opts->target) && !target__has_task(&opts->target); - if (!cpu_wide && perf_can_record_cpu_wide()) { + if (ptr->all_switch_events && !cpu_wide && perf_can_record_cpu_wide()) { struct evsel *switch_evsel; switch_evsel = evlist__add_dummy_on_all_cpus(evlist); @@ -1178,6 +1180,16 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused) return rdtsc(); } +static int intel_pt_perf_config(const char *var, const char *value, void *data) +{ + struct intel_pt_recording *ptr = data; + + if (!strcmp(var, "intel-pt.all-switch-events")) + ptr->all_switch_events = perf_config_bool(var, value); + + return 0; +} + struct auxtrace_record *intel_pt_recording_init(int *err) { struct perf_pmu *intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME); @@ -1197,6 +1209,8 @@ struct auxtrace_record *intel_pt_recording_init(int *err) return NULL; } + perf_config(intel_pt_perf_config, ptr); + ptr->intel_pt_pmu = intel_pt_pmu; ptr->itr.recording_options = intel_pt_recording_options; ptr->itr.info_priv_size = intel_pt_info_priv_size; diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c index 62df03e91c7e..b38f519020ff 100644 --- a/tools/perf/arch/x86/util/mem-events.c +++ b/tools/perf/arch/x86/util/mem-events.c @@ -26,3 +26,9 @@ struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = { E(NULL, NULL, NULL, false, 0), E("mem-ldst", "%s//", NULL, false, 0), }; + +struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX] = { + E(NULL, NULL, NULL, false, 0), + E(NULL, NULL, NULL, false, 0), + E("mem-ldst", "%s/ldlat=%u/", NULL, true, 0), +}; diff --git a/tools/perf/arch/x86/util/mem-events.h b/tools/perf/arch/x86/util/mem-events.h index f55c8d3b7d59..11e09a256f5b 100644 --- a/tools/perf/arch/x86/util/mem-events.h +++ b/tools/perf/arch/x86/util/mem-events.h @@ -6,5 +6,6 @@ extern struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX]; extern struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX]; extern struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX]; +extern struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX]; #endif /* _X86_MEM_EVENTS_H */ diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index e0060dac2a9f..58113482654b 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -8,6 +8,8 @@ #include <linux/perf_event.h> #include <linux/zalloc.h> #include <api/fs/fs.h> +#include <api/io_dir.h> +#include <internal/cpumap.h> #include <errno.h> #include "../../../util/intel-pt.h" @@ -16,10 +18,261 @@ #include "../../../util/fncache.h" #include "../../../util/pmus.h" #include "mem-events.h" +#include "util/debug.h" #include "util/env.h" +#include "util/header.h" -void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused) +static bool x86__is_intel_graniterapids(void) { + static bool checked_if_graniterapids; + static bool is_graniterapids; + + if (!checked_if_graniterapids) { + const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]"; + char *cpuid = get_cpuid_str((struct perf_cpu){0}); + + is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0; + free(cpuid); + checked_if_graniterapids = true; + } + return is_graniterapids; +} + +static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path) +{ + struct perf_cpu_map *cpus; + char *buf = NULL; + size_t buf_len; + + if (sysfs__read_str(sysfs_path, &buf, &buf_len) < 0) + return NULL; + + cpus = perf_cpu_map__new(buf); + free(buf); + return cpus; +} + +static int snc_nodes_per_l3_cache(void) +{ + static bool checked_snc; + static int snc_nodes; + + if (!checked_snc) { + struct perf_cpu_map *node_cpus = + read_sysfs_cpu_map("devices/system/node/node0/cpulist"); + struct perf_cpu_map *cache_cpus = + read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list"); + + snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus); + perf_cpu_map__put(cache_cpus); + perf_cpu_map__put(node_cpus); + checked_snc = true; + } + return snc_nodes; +} + +static bool starts_with(const char *str, const char *prefix) +{ + return !strncmp(prefix, str, strlen(prefix)); +} + +static int num_chas(void) +{ + static bool checked_chas; + static int num_chas; + + if (!checked_chas) { + int fd = perf_pmu__event_source_devices_fd(); + struct io_dir dir; + struct io_dirent64 *dent; + + if (fd < 0) + return -1; + + io_dir__init(&dir, fd); + + while ((dent = io_dir__readdir(&dir)) != NULL) { + /* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */ + if (starts_with(dent->d_name, "uncore_cha_")) + num_chas++; + } + close(fd); + checked_chas = true; + } + return num_chas; +} + +#define MAX_SNCS 6 + +static int uncore_cha_snc(struct perf_pmu *pmu) +{ + // CHA SNC numbers are ordered correspond to the CHAs number. + unsigned int cha_num; + int num_cha, chas_per_node, cha_snc; + int snc_nodes = snc_nodes_per_l3_cache(); + + if (snc_nodes <= 1) + return 0; + + num_cha = num_chas(); + if (num_cha <= 0) { + pr_warning("Unexpected: no CHAs found\n"); + return 0; + } + + /* Compute SNC for PMU. */ + if (sscanf(pmu->name, "uncore_cha_%u", &cha_num) != 1) { + pr_warning("Unexpected: unable to compute CHA number '%s'\n", pmu->name); + return 0; + } + chas_per_node = num_cha / snc_nodes; + cha_snc = cha_num / chas_per_node; + + /* Range check cha_snc. for unexpected out of bounds. */ + return cha_snc >= MAX_SNCS ? 0 : cha_snc; +} + +static int uncore_imc_snc(struct perf_pmu *pmu) +{ + // Compute the IMC SNC using lookup tables. + unsigned int imc_num; + int snc_nodes = snc_nodes_per_l3_cache(); + const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0}; + const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2}; + const u8 *snc_map; + size_t snc_map_len; + + switch (snc_nodes) { + case 2: + snc_map = snc2_map; + snc_map_len = ARRAY_SIZE(snc2_map); + break; + case 3: + snc_map = snc3_map; + snc_map_len = ARRAY_SIZE(snc3_map); + break; + default: + /* Error or no lookup support for SNC with >3 nodes. */ + return 0; + } + + /* Compute SNC for PMU. */ + if (sscanf(pmu->name, "uncore_imc_%u", &imc_num) != 1) { + pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name); + return 0; + } + if (imc_num >= snc_map_len) { + pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes); + return 0; + } + return snc_map[imc_num]; +} + +static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc) +{ + static bool checked_cpu_adjust[MAX_SNCS]; + static int cpu_adjust[MAX_SNCS]; + struct perf_cpu_map *node_cpus; + char node_path[] = "devices/system/node/node0/cpulist"; + + /* Was adjust already computed? */ + if (checked_cpu_adjust[pmu_snc]) + return cpu_adjust[pmu_snc]; + + /* SNC0 doesn't need an adjust. */ + if (pmu_snc == 0) { + cpu_adjust[0] = 0; + checked_cpu_adjust[0] = true; + return 0; + } + + /* + * Use NUMA topology to compute first CPU of the NUMA node, we want to + * adjust CPU 0 to be this and similarly for other CPUs if there is >1 + * socket. + */ + assert(pmu_snc >= 0 && pmu_snc <= 9); + node_path[24] += pmu_snc; // Shift node0 to be node<pmu_snc>. + node_cpus = read_sysfs_cpu_map(node_path); + cpu_adjust[pmu_snc] = perf_cpu_map__cpu(node_cpus, 0).cpu; + if (cpu_adjust[pmu_snc] < 0) { + pr_debug("Failed to read valid CPU list from <sysfs>/%s\n", node_path); + cpu_adjust[pmu_snc] = 0; + } else { + checked_cpu_adjust[pmu_snc] = true; + } + perf_cpu_map__put(node_cpus); + return cpu_adjust[pmu_snc]; +} + +static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha) +{ + // With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the + // topology. For example, a two socket graniterapids machine may be set + // up with 3-way SNC meaning there are 6 NUMA nodes that should be + // displayed with --per-node. The cpumask of the CHA and IMC PMUs + // reflects per-socket information meaning, for example, uncore_cha_60 + // on a two socket graniterapids machine with 120 cores per socket will + // have a cpumask of "0,120". This cpumask needs adjusting to "40,160" + // to reflect that uncore_cha_60 is used for the 2nd SNC of each + // socket. Without the adjustment events on uncore_cha_60 will appear in + // node 0 and node 3 (in our example 2 socket 3-way set up), but with + // the adjustment they will appear in node 1 and node 4. The number of + // CHAs is typically larger than the number of cores. The CHA numbers + // are assumed to split evenly and inorder wrt core numbers. There are + // fewer memory IMC PMUs than cores and mapping is handled using lookup + // tables. + static struct perf_cpu_map *cha_adjusted[MAX_SNCS]; + static struct perf_cpu_map *imc_adjusted[MAX_SNCS]; + struct perf_cpu_map **adjusted = cha ? cha_adjusted : imc_adjusted; + int idx, pmu_snc, cpu_adjust; + struct perf_cpu cpu; + bool alloc; + + // Cpus from the kernel holds first CPU of each socket. e.g. 0,120. + if (perf_cpu_map__cpu(pmu->cpus, 0).cpu != 0) { + pr_debug("Ignoring cpumask adjust for %s as unexpected first CPU\n", pmu->name); + return; + } + + pmu_snc = cha ? uncore_cha_snc(pmu) : uncore_imc_snc(pmu); + if (pmu_snc == 0) { + // No adjustment necessary for the first SNC. + return; + } + + alloc = adjusted[pmu_snc] == NULL; + if (alloc) { + // Hold onto the perf_cpu_map globally to avoid recomputation. + cpu_adjust = uncore_cha_imc_compute_cpu_adjust(pmu_snc); + adjusted[pmu_snc] = perf_cpu_map__empty_new(perf_cpu_map__nr(pmu->cpus)); + if (!adjusted[pmu_snc]) + return; + } + + perf_cpu_map__for_each_cpu(cpu, idx, pmu->cpus) { + // Compute the new cpu map values or if not allocating, assert + // that they match expectations. asserts will be removed to + // avoid overhead in NDEBUG builds. + if (alloc) { + RC_CHK_ACCESS(adjusted[pmu_snc])->map[idx].cpu = cpu.cpu + cpu_adjust; + } else if (idx == 0) { + cpu_adjust = perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu - cpu.cpu; + assert(uncore_cha_imc_compute_cpu_adjust(pmu_snc) == cpu_adjust); + } else { + assert(perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu == + cpu.cpu + cpu_adjust); + } + } + + perf_cpu_map__put(pmu->cpus); + pmu->cpus = perf_cpu_map__get(adjusted[pmu_snc]); +} + +void perf_pmu__arch_init(struct perf_pmu *pmu) +{ + struct perf_pmu_caps *ldlat_cap; + #ifdef HAVE_AUXTRACE_SUPPORT if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) { pmu->auxtrace = true; @@ -33,12 +286,31 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused) #endif if (x86__is_amd_cpu()) { - if (!strcmp(pmu->name, "ibs_op")) - pmu->mem_events = perf_mem_events_amd; - } else if (pmu->is_core) { - if (perf_pmu__have_event(pmu, "mem-loads-aux")) - pmu->mem_events = perf_mem_events_intel_aux; - else - pmu->mem_events = perf_mem_events_intel; + if (strcmp(pmu->name, "ibs_op")) + return; + + pmu->mem_events = perf_mem_events_amd; + + if (!perf_pmu__caps_parse(pmu)) + return; + + ldlat_cap = perf_pmu__get_cap(pmu, "ldlat"); + if (!ldlat_cap || strcmp(ldlat_cap->value, "1")) + return; + + perf_mem_events__loads_ldlat = 0; + pmu->mem_events = perf_mem_events_amd_ldlat; + } else { + if (pmu->is_core) { + if (perf_pmu__have_event(pmu, "mem-loads-aux")) + pmu->mem_events = perf_mem_events_intel_aux; + else + pmu->mem_events = perf_mem_events_intel; + } else if (x86__is_intel_graniterapids()) { + if (starts_with(pmu->name, "uncore_cha_")) + gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true); + else if (starts_with(pmu->name, "uncore_imc_")) + gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false); + } } } |