summaryrefslogtreecommitdiff
path: root/tools/perf/arch
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/arch')
-rw-r--r--tools/perf/arch/x86/include/arch-tests.h1
-rw-r--r--tools/perf/arch/x86/tests/Build1
-rw-r--r--tools/perf/arch/x86/tests/amd-ibs-period.c1032
-rw-r--r--tools/perf/arch/x86/tests/arch-tests.c2
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c16
-rw-r--r--tools/perf/arch/x86/util/mem-events.c6
-rw-r--r--tools/perf/arch/x86/util/mem-events.h1
-rw-r--r--tools/perf/arch/x86/util/pmu.c288
8 files changed, 1338 insertions, 9 deletions
diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index c0421a26b875..4fd425157d7d 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -14,6 +14,7 @@ int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest);
int test__bp_modify(struct test_suite *test, int subtest);
int test__x86_sample_parsing(struct test_suite *test, int subtest);
int test__amd_ibs_via_core_pmu(struct test_suite *test, int subtest);
+int test__amd_ibs_period(struct test_suite *test, int subtest);
int test__hybrid(struct test_suite *test, int subtest);
extern struct test_suite *arch_tests[];
diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 86262c720857..5e00cbfd2d56 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -10,6 +10,7 @@ perf-test-$(CONFIG_AUXTRACE) += insn-x86.o
endif
perf-test-$(CONFIG_X86_64) += bp-modify.o
perf-test-y += amd-ibs-via-core-pmu.o
+perf-test-y += amd-ibs-period.o
ifdef SHELLCHECK
SHELL_TESTS := gen-insn-x86-dat.sh
diff --git a/tools/perf/arch/x86/tests/amd-ibs-period.c b/tools/perf/arch/x86/tests/amd-ibs-period.c
new file mode 100644
index 000000000000..223e059e04de
--- /dev/null
+++ b/tools/perf/arch/x86/tests/amd-ibs-period.c
@@ -0,0 +1,1032 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sched.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/utsname.h>
+#include <string.h>
+
+#include "arch-tests.h"
+#include "linux/perf_event.h"
+#include "linux/zalloc.h"
+#include "tests/tests.h"
+#include "../perf-sys.h"
+#include "pmu.h"
+#include "pmus.h"
+#include "debug.h"
+#include "util.h"
+#include "strbuf.h"
+#include "../util/env.h"
+
+static int page_size;
+
+#define PERF_MMAP_DATA_PAGES 32L
+#define PERF_MMAP_DATA_SIZE (PERF_MMAP_DATA_PAGES * page_size)
+#define PERF_MMAP_DATA_MASK (PERF_MMAP_DATA_SIZE - 1)
+#define PERF_MMAP_TOTAL_PAGES (PERF_MMAP_DATA_PAGES + 1)
+#define PERF_MMAP_TOTAL_SIZE (PERF_MMAP_TOTAL_PAGES * page_size)
+
+#define rmb() asm volatile("lfence":::"memory")
+
+enum {
+ FD_ERROR,
+ FD_SUCCESS,
+};
+
+enum {
+ IBS_FETCH,
+ IBS_OP,
+};
+
+struct perf_pmu *fetch_pmu;
+struct perf_pmu *op_pmu;
+unsigned int perf_event_max_sample_rate;
+
+/* Dummy workload to generate IBS samples. */
+static int dummy_workload_1(unsigned long count)
+{
+ int (*func)(void);
+ int ret = 0;
+ char *p;
+ char insn1[] = {
+ 0xb8, 0x01, 0x00, 0x00, 0x00, /* mov 1,%eax */
+ 0xc3, /* ret */
+ 0xcc, /* int 3 */
+ };
+
+ char insn2[] = {
+ 0xb8, 0x02, 0x00, 0x00, 0x00, /* mov 2,%eax */
+ 0xc3, /* ret */
+ 0xcc, /* int 3 */
+ };
+
+ p = zalloc(2 * page_size);
+ if (!p) {
+ printf("malloc() failed. %m");
+ return 1;
+ }
+
+ func = (void *)((unsigned long)(p + page_size - 1) & ~(page_size - 1));
+
+ ret = mprotect(func, page_size, PROT_READ | PROT_WRITE | PROT_EXEC);
+ if (ret) {
+ printf("mprotect() failed. %m");
+ goto out;
+ }
+
+ if (count < 100000)
+ count = 100000;
+ else if (count > 10000000)
+ count = 10000000;
+ while (count--) {
+ memcpy((void *)func, insn1, sizeof(insn1));
+ if (func() != 1) {
+ pr_debug("ERROR insn1\n");
+ ret = -1;
+ goto out;
+ }
+ memcpy((void *)func, insn2, sizeof(insn2));
+ if (func() != 2) {
+ pr_debug("ERROR insn2\n");
+ ret = -1;
+ goto out;
+ }
+ }
+
+out:
+ free(p);
+ return ret;
+}
+
+/* Another dummy workload to generate IBS samples. */
+static void dummy_workload_2(char *perf)
+{
+ char bench[] = " bench sched messaging -g 10 -l 5000 > /dev/null 2>&1";
+ char taskset[] = "taskset -c 0 ";
+ int ret __maybe_unused;
+ struct strbuf sb;
+ char *cmd;
+
+ strbuf_init(&sb, 0);
+ strbuf_add(&sb, taskset, strlen(taskset));
+ strbuf_add(&sb, perf, strlen(perf));
+ strbuf_add(&sb, bench, strlen(bench));
+ cmd = strbuf_detach(&sb, NULL);
+ ret = system(cmd);
+ free(cmd);
+}
+
+static int sched_affine(int cpu)
+{
+ cpu_set_t set;
+
+ CPU_ZERO(&set);
+ CPU_SET(cpu, &set);
+ if (sched_setaffinity(getpid(), sizeof(set), &set) == -1) {
+ pr_debug("sched_setaffinity() failed. [%m]");
+ return -1;
+ }
+ return 0;
+}
+
+static void
+copy_sample_data(void *src, unsigned long offset, void *dest, size_t size)
+{
+ size_t chunk1_size, chunk2_size;
+
+ if ((offset + size) < (size_t)PERF_MMAP_DATA_SIZE) {
+ memcpy(dest, src + offset, size);
+ } else {
+ chunk1_size = PERF_MMAP_DATA_SIZE - offset;
+ chunk2_size = size - chunk1_size;
+
+ memcpy(dest, src + offset, chunk1_size);
+ memcpy(dest + chunk1_size, src, chunk2_size);
+ }
+}
+
+static int rb_read(struct perf_event_mmap_page *rb, void *dest, size_t size)
+{
+ void *base;
+ unsigned long data_tail, data_head;
+
+ /* Casting to (void *) is needed. */
+ base = (void *)rb + page_size;
+
+ data_head = rb->data_head;
+ rmb();
+ data_tail = rb->data_tail;
+
+ if ((data_head - data_tail) < size)
+ return -1;
+
+ data_tail &= PERF_MMAP_DATA_MASK;
+ copy_sample_data(base, data_tail, dest, size);
+ rb->data_tail += size;
+ return 0;
+}
+
+static void rb_skip(struct perf_event_mmap_page *rb, size_t size)
+{
+ size_t data_head = rb->data_head;
+
+ rmb();
+
+ if ((rb->data_tail + size) > data_head)
+ rb->data_tail = data_head;
+ else
+ rb->data_tail += size;
+}
+
+/* Sample period value taken from perf sample must match with expected value. */
+static int period_equal(unsigned long exp_period, unsigned long act_period)
+{
+ return exp_period == act_period ? 0 : -1;
+}
+
+/*
+ * Sample period value taken from perf sample must be >= minimum sample period
+ * supported by IBS HW.
+ */
+static int period_higher(unsigned long min_period, unsigned long act_period)
+{
+ return min_period <= act_period ? 0 : -1;
+}
+
+static int rb_drain_samples(struct perf_event_mmap_page *rb,
+ unsigned long exp_period,
+ int *nr_samples,
+ int (*callback)(unsigned long, unsigned long))
+{
+ struct perf_event_header hdr;
+ unsigned long period;
+ int ret = 0;
+
+ /*
+ * PERF_RECORD_SAMPLE:
+ * struct {
+ * struct perf_event_header hdr;
+ * { u64 period; } && PERF_SAMPLE_PERIOD
+ * };
+ */
+ while (1) {
+ if (rb_read(rb, &hdr, sizeof(hdr)))
+ return ret;
+
+ if (hdr.type == PERF_RECORD_SAMPLE) {
+ (*nr_samples)++;
+ period = 0;
+ if (rb_read(rb, &period, sizeof(period)))
+ pr_debug("rb_read(period) error. [%m]");
+ ret |= callback(exp_period, period);
+ } else {
+ rb_skip(rb, hdr.size - sizeof(hdr));
+ }
+ }
+ return ret;
+}
+
+static long perf_event_open(struct perf_event_attr *attr, pid_t pid,
+ int cpu, int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static void fetch_prepare_attr(struct perf_event_attr *attr,
+ unsigned long long config, int freq,
+ unsigned long sample_period)
+{
+ memset(attr, 0, sizeof(struct perf_event_attr));
+
+ attr->type = fetch_pmu->type;
+ attr->size = sizeof(struct perf_event_attr);
+ attr->config = config;
+ attr->disabled = 1;
+ attr->sample_type = PERF_SAMPLE_PERIOD;
+ attr->freq = freq;
+ attr->sample_period = sample_period; /* = ->sample_freq */
+}
+
+static void op_prepare_attr(struct perf_event_attr *attr,
+ unsigned long config, int freq,
+ unsigned long sample_period)
+{
+ memset(attr, 0, sizeof(struct perf_event_attr));
+
+ attr->type = op_pmu->type;
+ attr->size = sizeof(struct perf_event_attr);
+ attr->config = config;
+ attr->disabled = 1;
+ attr->sample_type = PERF_SAMPLE_PERIOD;
+ attr->freq = freq;
+ attr->sample_period = sample_period; /* = ->sample_freq */
+}
+
+struct ibs_configs {
+ /* Input */
+ unsigned long config;
+
+ /* Expected output */
+ unsigned long period;
+ int fd;
+};
+
+/*
+ * Somehow first Fetch event with sample period = 0x10 causes 0
+ * samples. So start with large period and decrease it gradually.
+ */
+struct ibs_configs fetch_configs[] = {
+ { .config = 0xffff, .period = 0xffff0, .fd = FD_SUCCESS },
+ { .config = 0x1000, .period = 0x10000, .fd = FD_SUCCESS },
+ { .config = 0xff, .period = 0xff0, .fd = FD_SUCCESS },
+ { .config = 0x1, .period = 0x10, .fd = FD_SUCCESS },
+ { .config = 0x0, .period = -1, .fd = FD_ERROR },
+ { .config = 0x10000, .period = -1, .fd = FD_ERROR },
+};
+
+struct ibs_configs op_configs[] = {
+ { .config = 0x0, .period = -1, .fd = FD_ERROR },
+ { .config = 0x1, .period = -1, .fd = FD_ERROR },
+ { .config = 0x8, .period = -1, .fd = FD_ERROR },
+ { .config = 0x9, .period = 0x90, .fd = FD_SUCCESS },
+ { .config = 0xf, .period = 0xf0, .fd = FD_SUCCESS },
+ { .config = 0x1000, .period = 0x10000, .fd = FD_SUCCESS },
+ { .config = 0xffff, .period = 0xffff0, .fd = FD_SUCCESS },
+ { .config = 0x10000, .period = -1, .fd = FD_ERROR },
+ { .config = 0x100000, .period = 0x100000, .fd = FD_SUCCESS },
+ { .config = 0xf00000, .period = 0xf00000, .fd = FD_SUCCESS },
+ { .config = 0xf0ffff, .period = 0xfffff0, .fd = FD_SUCCESS },
+ { .config = 0x1f0ffff, .period = 0x1fffff0, .fd = FD_SUCCESS },
+ { .config = 0x7f0ffff, .period = 0x7fffff0, .fd = FD_SUCCESS },
+ { .config = 0x8f0ffff, .period = -1, .fd = FD_ERROR },
+ { .config = 0x17f0ffff, .period = -1, .fd = FD_ERROR },
+};
+
+static int __ibs_config_test(int ibs_type, struct ibs_configs *config, int *nr_samples)
+{
+ struct perf_event_attr attr;
+ int fd, i;
+ void *rb;
+ int ret = 0;
+
+ if (ibs_type == IBS_FETCH)
+ fetch_prepare_attr(&attr, config->config, 0, 0);
+ else
+ op_prepare_attr(&attr, config->config, 0, 0);
+
+ /* CPU0, All processes */
+ fd = perf_event_open(&attr, -1, 0, -1, 0);
+ if (config->fd == FD_ERROR) {
+ if (fd != -1) {
+ close(fd);
+ return -1;
+ }
+ return 0;
+ }
+ if (fd <= -1)
+ return -1;
+
+ rb = mmap(NULL, PERF_MMAP_TOTAL_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (rb == MAP_FAILED) {
+ pr_debug("mmap() failed. [%m]\n");
+ return -1;
+ }
+
+ ioctl(fd, PERF_EVENT_IOC_RESET, 0);
+ ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
+
+ i = 5;
+ while (i--) {
+ dummy_workload_1(1000000);
+
+ ret = rb_drain_samples(rb, config->period, nr_samples,
+ period_equal);
+ if (ret)
+ break;
+ }
+
+ ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
+ munmap(rb, PERF_MMAP_TOTAL_SIZE);
+ close(fd);
+ return ret;
+}
+
+static int ibs_config_test(void)
+{
+ int nr_samples = 0;
+ unsigned long i;
+ int ret = 0;
+ int r;
+
+ pr_debug("\nIBS config tests:\n");
+ pr_debug("-----------------\n");
+
+ pr_debug("Fetch PMU tests:\n");
+ for (i = 0; i < ARRAY_SIZE(fetch_configs); i++) {
+ nr_samples = 0;
+ r = __ibs_config_test(IBS_FETCH, &(fetch_configs[i]), &nr_samples);
+
+ if (fetch_configs[i].fd == FD_ERROR) {
+ pr_debug("0x%-16lx: %-4s\n", fetch_configs[i].config,
+ !r ? "Ok" : "Fail");
+ } else {
+ /*
+ * Although nr_samples == 0 is reported as Fail here,
+ * the failure status is not cascaded up because, we
+ * can not decide whether test really failed or not
+ * without actual samples.
+ */
+ pr_debug("0x%-16lx: %-4s (nr samples: %d)\n", fetch_configs[i].config,
+ (!r && nr_samples != 0) ? "Ok" : "Fail", nr_samples);
+ }
+
+ ret |= r;
+ }
+
+ pr_debug("Op PMU tests:\n");
+ for (i = 0; i < ARRAY_SIZE(op_configs); i++) {
+ nr_samples = 0;
+ r = __ibs_config_test(IBS_OP, &(op_configs[i]), &nr_samples);
+
+ if (op_configs[i].fd == FD_ERROR) {
+ pr_debug("0x%-16lx: %-4s\n", op_configs[i].config,
+ !r ? "Ok" : "Fail");
+ } else {
+ /*
+ * Although nr_samples == 0 is reported as Fail here,
+ * the failure status is not cascaded up because, we
+ * can not decide whether test really failed or not
+ * without actual samples.
+ */
+ pr_debug("0x%-16lx: %-4s (nr samples: %d)\n", op_configs[i].config,
+ (!r && nr_samples != 0) ? "Ok" : "Fail", nr_samples);
+ }
+
+ ret |= r;
+ }
+
+ return ret;
+}
+
+struct ibs_period {
+ /* Input */
+ int freq;
+ unsigned long sample_freq;
+
+ /* Output */
+ int ret;
+ unsigned long period;
+};
+
+struct ibs_period fetch_period[] = {
+ { .freq = 0, .sample_freq = 0, .ret = FD_ERROR, .period = -1 },
+ { .freq = 0, .sample_freq = 1, .ret = FD_ERROR, .period = -1 },
+ { .freq = 0, .sample_freq = 0xf, .ret = FD_ERROR, .period = -1 },
+ { .freq = 0, .sample_freq = 0x10, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 0, .sample_freq = 0x11, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 0, .sample_freq = 0x8f, .ret = FD_SUCCESS, .period = 0x80 },
+ { .freq = 0, .sample_freq = 0x90, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 0, .sample_freq = 0x91, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 0, .sample_freq = 0x4d2, .ret = FD_SUCCESS, .period = 0x4d0 },
+ { .freq = 0, .sample_freq = 0x1007, .ret = FD_SUCCESS, .period = 0x1000 },
+ { .freq = 0, .sample_freq = 0xfff0, .ret = FD_SUCCESS, .period = 0xfff0 },
+ { .freq = 0, .sample_freq = 0xffff, .ret = FD_SUCCESS, .period = 0xfff0 },
+ { .freq = 0, .sample_freq = 0x10010, .ret = FD_SUCCESS, .period = 0x10010 },
+ { .freq = 0, .sample_freq = 0x7fffff, .ret = FD_SUCCESS, .period = 0x7ffff0 },
+ { .freq = 0, .sample_freq = 0xfffffff, .ret = FD_SUCCESS, .period = 0xffffff0 },
+ { .freq = 1, .sample_freq = 0, .ret = FD_ERROR, .period = -1 },
+ { .freq = 1, .sample_freq = 1, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 1, .sample_freq = 0xf, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 1, .sample_freq = 0x10, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 1, .sample_freq = 0x11, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 1, .sample_freq = 0x8f, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 1, .sample_freq = 0x90, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 1, .sample_freq = 0x91, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 1, .sample_freq = 0x4d2, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 1, .sample_freq = 0x1007, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 1, .sample_freq = 0xfff0, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 1, .sample_freq = 0xffff, .ret = FD_SUCCESS, .period = 0x10 },
+ { .freq = 1, .sample_freq = 0x10010, .ret = FD_SUCCESS, .period = 0x10 },
+ /* ret=FD_ERROR because freq > default perf_event_max_sample_rate (100000) */
+ { .freq = 1, .sample_freq = 0x7fffff, .ret = FD_ERROR, .period = -1 },
+};
+
+struct ibs_period op_period[] = {
+ { .freq = 0, .sample_freq = 0, .ret = FD_ERROR, .period = -1 },
+ { .freq = 0, .sample_freq = 1, .ret = FD_ERROR, .period = -1 },
+ { .freq = 0, .sample_freq = 0xf, .ret = FD_ERROR, .period = -1 },
+ { .freq = 0, .sample_freq = 0x10, .ret = FD_ERROR, .period = -1 },
+ { .freq = 0, .sample_freq = 0x11, .ret = FD_ERROR, .period = -1 },
+ { .freq = 0, .sample_freq = 0x8f, .ret = FD_ERROR, .period = -1 },
+ { .freq = 0, .sample_freq = 0x90, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 0, .sample_freq = 0x91, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 0, .sample_freq = 0x4d2, .ret = FD_SUCCESS, .period = 0x4d0 },
+ { .freq = 0, .sample_freq = 0x1007, .ret = FD_SUCCESS, .period = 0x1000 },
+ { .freq = 0, .sample_freq = 0xfff0, .ret = FD_SUCCESS, .period = 0xfff0 },
+ { .freq = 0, .sample_freq = 0xffff, .ret = FD_SUCCESS, .period = 0xfff0 },
+ { .freq = 0, .sample_freq = 0x10010, .ret = FD_SUCCESS, .period = 0x10010 },
+ { .freq = 0, .sample_freq = 0x7fffff, .ret = FD_SUCCESS, .period = 0x7ffff0 },
+ { .freq = 0, .sample_freq = 0xfffffff, .ret = FD_SUCCESS, .period = 0xffffff0 },
+ { .freq = 1, .sample_freq = 0, .ret = FD_ERROR, .period = -1 },
+ { .freq = 1, .sample_freq = 1, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 1, .sample_freq = 0xf, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 1, .sample_freq = 0x10, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 1, .sample_freq = 0x11, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 1, .sample_freq = 0x8f, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 1, .sample_freq = 0x90, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 1, .sample_freq = 0x91, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 1, .sample_freq = 0x4d2, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 1, .sample_freq = 0x1007, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 1, .sample_freq = 0xfff0, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 1, .sample_freq = 0xffff, .ret = FD_SUCCESS, .period = 0x90 },
+ { .freq = 1, .sample_freq = 0x10010, .ret = FD_SUCCESS, .period = 0x90 },
+ /* ret=FD_ERROR because freq > default perf_event_max_sample_rate (100000) */
+ { .freq = 1, .sample_freq = 0x7fffff, .ret = FD_ERROR, .period = -1 },
+};
+
+static int __ibs_period_constraint_test(int ibs_type, struct ibs_period *period,
+ int *nr_samples)
+{
+ struct perf_event_attr attr;
+ int ret = 0;
+ void *rb;
+ int fd;
+
+ if (period->freq && period->sample_freq > perf_event_max_sample_rate)
+ period->ret = FD_ERROR;
+
+ if (ibs_type == IBS_FETCH)
+ fetch_prepare_attr(&attr, 0, period->freq, period->sample_freq);
+ else
+ op_prepare_attr(&attr, 0, period->freq, period->sample_freq);
+
+ /* CPU0, All processes */
+ fd = perf_event_open(&attr, -1, 0, -1, 0);
+ if (period->ret == FD_ERROR) {
+ if (fd != -1) {
+ close(fd);
+ return -1;
+ }
+ return 0;
+ }
+ if (fd <= -1)
+ return -1;
+
+ rb = mmap(NULL, PERF_MMAP_TOTAL_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (rb == MAP_FAILED) {
+ pr_debug("mmap() failed. [%m]\n");
+ close(fd);
+ return -1;
+ }
+
+ ioctl(fd, PERF_EVENT_IOC_RESET, 0);
+ ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
+
+ if (period->freq) {
+ dummy_workload_1(100000);
+ ret = rb_drain_samples(rb, period->period, nr_samples,
+ period_higher);
+ } else {
+ dummy_workload_1(period->sample_freq * 10);
+ ret = rb_drain_samples(rb, period->period, nr_samples,
+ period_equal);
+ }
+
+ ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
+ munmap(rb, PERF_MMAP_TOTAL_SIZE);
+ close(fd);
+ return ret;
+}
+
+static int ibs_period_constraint_test(void)
+{
+ unsigned long i;
+ int nr_samples;
+ int ret = 0;
+ int r;
+
+ pr_debug("\nIBS sample period constraint tests:\n");
+ pr_debug("-----------------------------------\n");
+
+ pr_debug("Fetch PMU test:\n");
+ for (i = 0; i < ARRAY_SIZE(fetch_period); i++) {
+ nr_samples = 0;
+ r = __ibs_period_constraint_test(IBS_FETCH, &fetch_period[i],
+ &nr_samples);
+
+ if (fetch_period[i].ret == FD_ERROR) {
+ pr_debug("freq %d, sample_freq %9ld: %-4s\n",
+ fetch_period[i].freq, fetch_period[i].sample_freq,
+ !r ? "Ok" : "Fail");
+ } else {
+ /*
+ * Although nr_samples == 0 is reported as Fail here,
+ * the failure status is not cascaded up because, we
+ * can not decide whether test really failed or not
+ * without actual samples.
+ */
+ pr_debug("freq %d, sample_freq %9ld: %-4s (nr samples: %d)\n",
+ fetch_period[i].freq, fetch_period[i].sample_freq,
+ (!r && nr_samples != 0) ? "Ok" : "Fail", nr_samples);
+ }
+ ret |= r;
+ }
+
+ pr_debug("Op PMU test:\n");
+ for (i = 0; i < ARRAY_SIZE(op_period); i++) {
+ nr_samples = 0;
+ r = __ibs_period_constraint_test(IBS_OP, &op_period[i],
+ &nr_samples);
+
+ if (op_period[i].ret == FD_ERROR) {
+ pr_debug("freq %d, sample_freq %9ld: %-4s\n",
+ op_period[i].freq, op_period[i].sample_freq,
+ !r ? "Ok" : "Fail");
+ } else {
+ /*
+ * Although nr_samples == 0 is reported as Fail here,
+ * the failure status is not cascaded up because, we
+ * can not decide whether test really failed or not
+ * without actual samples.
+ */
+ pr_debug("freq %d, sample_freq %9ld: %-4s (nr samples: %d)\n",
+ op_period[i].freq, op_period[i].sample_freq,
+ (!r && nr_samples != 0) ? "Ok" : "Fail", nr_samples);
+ }
+ ret |= r;
+ }
+
+ return ret;
+}
+
+struct ibs_ioctl {
+ /* Input */
+ int freq;
+ unsigned long period;
+
+ /* Expected output */
+ int ret;
+};
+
+struct ibs_ioctl fetch_ioctl[] = {
+ { .freq = 0, .period = 0x0, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x1, .ret = FD_ERROR },
+ { .freq = 0, .period = 0xf, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x10, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0x11, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x1f, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x20, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0x80, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0x8f, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x90, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0x91, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x100, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0xfff0, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0xffff, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x10000, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0x1fff0, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0x1fff5, .ret = FD_ERROR },
+ { .freq = 1, .period = 0x0, .ret = FD_ERROR },
+ { .freq = 1, .period = 0x1, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0xf, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x10, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x11, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x1f, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x20, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x80, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x8f, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x90, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x91, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x100, .ret = FD_SUCCESS },
+};
+
+struct ibs_ioctl op_ioctl[] = {
+ { .freq = 0, .period = 0x0, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x1, .ret = FD_ERROR },
+ { .freq = 0, .period = 0xf, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x10, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x11, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x1f, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x20, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x80, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x8f, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x90, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0x91, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x100, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0xfff0, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0xffff, .ret = FD_ERROR },
+ { .freq = 0, .period = 0x10000, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0x1fff0, .ret = FD_SUCCESS },
+ { .freq = 0, .period = 0x1fff5, .ret = FD_ERROR },
+ { .freq = 1, .period = 0x0, .ret = FD_ERROR },
+ { .freq = 1, .period = 0x1, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0xf, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x10, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x11, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x1f, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x20, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x80, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x8f, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x90, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x91, .ret = FD_SUCCESS },
+ { .freq = 1, .period = 0x100, .ret = FD_SUCCESS },
+};
+
+static int __ibs_ioctl_test(int ibs_type, struct ibs_ioctl *ibs_ioctl)
+{
+ struct perf_event_attr attr;
+ int ret = 0;
+ int fd;
+ int r;
+
+ if (ibs_type == IBS_FETCH)
+ fetch_prepare_attr(&attr, 0, ibs_ioctl->freq, 1000);
+ else
+ op_prepare_attr(&attr, 0, ibs_ioctl->freq, 1000);
+
+ /* CPU0, All processes */
+ fd = perf_event_open(&attr, -1, 0, -1, 0);
+ if (fd <= -1) {
+ pr_debug("event_open() Failed\n");
+ return -1;
+ }
+
+ r = ioctl(fd, PERF_EVENT_IOC_PERIOD, &ibs_ioctl->period);
+ if ((ibs_ioctl->ret == FD_SUCCESS && r <= -1) ||
+ (ibs_ioctl->ret == FD_ERROR && r >= 0)) {
+ ret = -1;
+ }
+
+ close(fd);
+ return ret;
+}
+
+static int ibs_ioctl_test(void)
+{
+ unsigned long i;
+ int ret = 0;
+ int r;
+
+ pr_debug("\nIBS ioctl() tests:\n");
+ pr_debug("------------------\n");
+
+ pr_debug("Fetch PMU tests\n");
+ for (i = 0; i < ARRAY_SIZE(fetch_ioctl); i++) {
+ r = __ibs_ioctl_test(IBS_FETCH, &fetch_ioctl[i]);
+
+ pr_debug("ioctl(%s = 0x%-7lx): %s\n",
+ fetch_ioctl[i].freq ? "freq " : "period",
+ fetch_ioctl[i].period, r ? "Fail" : "Ok");
+ ret |= r;
+ }
+
+ pr_debug("Op PMU tests\n");
+ for (i = 0; i < ARRAY_SIZE(op_ioctl); i++) {
+ r = __ibs_ioctl_test(IBS_OP, &op_ioctl[i]);
+
+ pr_debug("ioctl(%s = 0x%-7lx): %s\n",
+ op_ioctl[i].freq ? "freq " : "period",
+ op_ioctl[i].period, r ? "Fail" : "Ok");
+ ret |= r;
+ }
+
+ return ret;
+}
+
+static int ibs_freq_neg_test(void)
+{
+ struct perf_event_attr attr;
+ int fd;
+
+ pr_debug("\nIBS freq (negative) tests:\n");
+ pr_debug("--------------------------\n");
+
+ /*
+ * Assuming perf_event_max_sample_rate <= 100000,
+ * config: 0x300D40 ==> MaxCnt: 200000
+ */
+ op_prepare_attr(&attr, 0x300D40, 1, 0);
+
+ /* CPU0, All processes */
+ fd = perf_event_open(&attr, -1, 0, -1, 0);
+ if (fd != -1) {
+ pr_debug("freq 1, sample_freq 200000: Fail\n");
+ close(fd);
+ return -1;
+ }
+
+ pr_debug("freq 1, sample_freq 200000: Ok\n");
+
+ return 0;
+}
+
+struct ibs_l3missonly {
+ /* Input */
+ int freq;
+ unsigned long sample_freq;
+
+ /* Expected output */
+ int ret;
+ unsigned long min_period;
+};
+
+struct ibs_l3missonly fetch_l3missonly = {
+ .freq = 1,
+ .sample_freq = 10000,
+ .ret = FD_SUCCESS,
+ .min_period = 0x10,
+};
+
+struct ibs_l3missonly op_l3missonly = {
+ .freq = 1,
+ .sample_freq = 10000,
+ .ret = FD_SUCCESS,
+ .min_period = 0x90,
+};
+
+static int __ibs_l3missonly_test(char *perf, int ibs_type, int *nr_samples,
+ struct ibs_l3missonly *l3missonly)
+{
+ struct perf_event_attr attr;
+ int ret = 0;
+ void *rb;
+ int fd;
+
+ if (l3missonly->sample_freq > perf_event_max_sample_rate)
+ l3missonly->ret = FD_ERROR;
+
+ if (ibs_type == IBS_FETCH) {
+ fetch_prepare_attr(&attr, 0x800000000000000UL, l3missonly->freq,
+ l3missonly->sample_freq);
+ } else {
+ op_prepare_attr(&attr, 0x10000, l3missonly->freq,
+ l3missonly->sample_freq);
+ }
+
+ /* CPU0, All processes */
+ fd = perf_event_open(&attr, -1, 0, -1, 0);
+ if (l3missonly->ret == FD_ERROR) {
+ if (fd != -1) {
+ close(fd);
+ return -1;
+ }
+ return 0;
+ }
+ if (fd == -1) {
+ pr_debug("perf_event_open() failed. [%m]\n");
+ return -1;
+ }
+
+ rb = mmap(NULL, PERF_MMAP_TOTAL_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (rb == MAP_FAILED) {
+ pr_debug("mmap() failed. [%m]\n");
+ close(fd);
+ return -1;
+ }
+
+ ioctl(fd, PERF_EVENT_IOC_RESET, 0);
+ ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
+
+ dummy_workload_2(perf);
+
+ ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
+
+ ret = rb_drain_samples(rb, l3missonly->min_period, nr_samples, period_higher);
+
+ munmap(rb, PERF_MMAP_TOTAL_SIZE);
+ close(fd);
+ return ret;
+}
+
+static int ibs_l3missonly_test(char *perf)
+{
+ int nr_samples = 0;
+ int ret = 0;
+ int r = 0;
+
+ pr_debug("\nIBS L3MissOnly test: (takes a while)\n");
+ pr_debug("--------------------\n");
+
+ if (perf_pmu__has_format(fetch_pmu, "l3missonly")) {
+ nr_samples = 0;
+ r = __ibs_l3missonly_test(perf, IBS_FETCH, &nr_samples, &fetch_l3missonly);
+ if (fetch_l3missonly.ret == FD_ERROR) {
+ pr_debug("Fetch L3MissOnly: %-4s\n", !r ? "Ok" : "Fail");
+ } else {
+ /*
+ * Although nr_samples == 0 is reported as Fail here,
+ * the failure status is not cascaded up because, we
+ * can not decide whether test really failed or not
+ * without actual samples.
+ */
+ pr_debug("Fetch L3MissOnly: %-4s (nr_samples: %d)\n",
+ (!r && nr_samples != 0) ? "Ok" : "Fail", nr_samples);
+ }
+ ret |= r;
+ }
+
+ if (perf_pmu__has_format(op_pmu, "l3missonly")) {
+ nr_samples = 0;
+ r = __ibs_l3missonly_test(perf, IBS_OP, &nr_samples, &op_l3missonly);
+ if (op_l3missonly.ret == FD_ERROR) {
+ pr_debug("Op L3MissOnly: %-4s\n", !r ? "Ok" : "Fail");
+ } else {
+ /*
+ * Although nr_samples == 0 is reported as Fail here,
+ * the failure status is not cascaded up because, we
+ * can not decide whether test really failed or not
+ * without actual samples.
+ */
+ pr_debug("Op L3MissOnly: %-4s (nr_samples: %d)\n",
+ (!r && nr_samples != 0) ? "Ok" : "Fail", nr_samples);
+ }
+ ret |= r;
+ }
+
+ return ret;
+}
+
+static unsigned int get_perf_event_max_sample_rate(void)
+{
+ unsigned int max_sample_rate = 100000;
+ FILE *fp;
+ int ret;
+
+ fp = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
+ if (!fp) {
+ pr_debug("Can't open perf_event_max_sample_rate. Assuming %d\n",
+ max_sample_rate);
+ goto out;
+ }
+
+ ret = fscanf(fp, "%d", &max_sample_rate);
+ if (ret == EOF) {
+ pr_debug("Can't read perf_event_max_sample_rate. Assuming 100000\n");
+ max_sample_rate = 100000;
+ }
+ fclose(fp);
+
+out:
+ return max_sample_rate;
+}
+
+/*
+ * Bunch of IBS sample period fixes that this test exercise went in v6.15.
+ * Skip the test on older kernels to distinguish between test failure due
+ * to a new bug vs known failure due to older kernel.
+ */
+static bool kernel_v6_15_or_newer(void)
+{
+ struct utsname utsname;
+ char *endptr = NULL;
+ long major, minor;
+
+ if (uname(&utsname) < 0) {
+ pr_debug("uname() failed. [%m]");
+ return false;
+ }
+
+ major = strtol(utsname.release, &endptr, 10);
+ endptr++;
+ minor = strtol(endptr, NULL, 10);
+
+ return major >= 6 && minor >= 15;
+}
+
+int test__amd_ibs_period(struct test_suite *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ char perf[PATH_MAX] = {'\0'};
+ int ret = TEST_OK;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /*
+ * Reading perf_event_max_sample_rate only once _might_ cause some
+ * of the test to fail if kernel changes it after reading it here.
+ */
+ perf_event_max_sample_rate = get_perf_event_max_sample_rate();
+ fetch_pmu = perf_pmus__find("ibs_fetch");
+ op_pmu = perf_pmus__find("ibs_op");
+
+ if (!x86__is_amd_cpu() || !fetch_pmu || !op_pmu)
+ return TEST_SKIP;
+
+ if (!kernel_v6_15_or_newer()) {
+ pr_debug("Need v6.15 or newer kernel. Skipping.\n");
+ return TEST_SKIP;
+ }
+
+ perf_exe(perf, sizeof(perf));
+
+ if (sched_affine(0))
+ return TEST_FAIL;
+
+ /*
+ * Perf event can be opened in two modes:
+ * 1 Freq mode
+ * perf_event_attr->freq = 1, ->sample_freq = <frequency>
+ * 2 Sample period mode
+ * perf_event_attr->freq = 0, ->sample_period = <period>
+ *
+ * Instead of using above interface, IBS event in 'sample period mode'
+ * can also be opened by passing <period> value directly in a MaxCnt
+ * bitfields of perf_event_attr->config. Test this IBS specific special
+ * interface.
+ */
+ if (ibs_config_test())
+ ret = TEST_FAIL;
+
+ /*
+ * IBS Fetch and Op PMUs have HW constraints on minimum sample period.
+ * Also, sample period value must be in multiple of 0x10. Test that IBS
+ * driver honors HW constraints for various possible values in Freq as
+ * well as Sample Period mode IBS events.
+ */
+ if (ibs_period_constraint_test())
+ ret = TEST_FAIL;
+
+ /*
+ * Test ioctl() with various sample period values for IBS event.
+ */
+ if (ibs_ioctl_test())
+ ret = TEST_FAIL;
+
+ /*
+ * Test that opening of freq mode IBS event fails when the freq value
+ * is passed through ->config, not explicitly in ->sample_freq. Also
+ * use high freq value (beyond perf_event_max_sample_rate) to test IBS
+ * driver do not bypass perf_event_max_sample_rate checks.
+ */
+ if (ibs_freq_neg_test())
+ ret = TEST_FAIL;
+
+ /*
+ * L3MissOnly is a post-processing filter, i.e. IBS HW checks for L3
+ * Miss at the completion of the tagged uOp. The sample is discarded
+ * if the tagged uOp did not cause L3Miss. Also, IBS HW internally
+ * resets CurCnt to a small pseudo-random value and resumes counting.
+ * A new uOp is tagged once CurCnt reaches to MaxCnt. But the process
+ * repeats until the tagged uOp causes an L3 Miss.
+ *
+ * With the freq mode event, the next sample period is calculated by
+ * generic kernel on every sample to achieve desired freq of samples.
+ *
+ * Since the number of times HW internally reset CurCnt and the pseudo-
+ * random value of CurCnt for all those occurrences are not known to SW,
+ * the sample period adjustment by kernel goes for a toes for freq mode
+ * IBS events. Kernel will set very small period for the next sample if
+ * the window between current sample and prev sample is too high due to
+ * multiple samples being discarded internally by IBS HW.
+ *
+ * Test that IBS sample period constraints are honored when L3MissOnly
+ * is ON.
+ */
+ if (ibs_l3missonly_test(perf))
+ ret = TEST_FAIL;
+
+ return ret;
+}
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index a216a5d172ed..bfee2432515b 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -25,6 +25,7 @@ DEFINE_SUITE("x86 bp modify", bp_modify);
#endif
DEFINE_SUITE("x86 Sample parsing", x86_sample_parsing);
DEFINE_SUITE("AMD IBS via core pmu", amd_ibs_via_core_pmu);
+DEFINE_SUITE_EXCLUSIVE("AMD IBS sample period", amd_ibs_period);
static struct test_case hybrid_tests[] = {
TEST_CASE_REASON("x86 hybrid event parsing", hybrid, "not hybrid"),
{ .name = NULL, }
@@ -50,6 +51,7 @@ struct test_suite *arch_tests[] = {
#endif
&suite__x86_sample_parsing,
&suite__amd_ibs_via_core_pmu,
+ &suite__amd_ibs_period,
&suite__hybrid,
NULL,
};
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 8f235d8b67b6..add33cb5d1da 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -19,6 +19,7 @@
#include "../../../util/evlist.h"
#include "../../../util/evsel.h"
#include "../../../util/evsel_config.h"
+#include "../../../util/config.h"
#include "../../../util/cpumap.h"
#include "../../../util/mmap.h"
#include <subcmd/parse-options.h>
@@ -52,6 +53,7 @@ struct intel_pt_recording {
struct perf_pmu *intel_pt_pmu;
int have_sched_switch;
struct evlist *evlist;
+ bool all_switch_events;
bool snapshot_mode;
bool snapshot_init_done;
size_t snapshot_size;
@@ -794,7 +796,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
bool cpu_wide = !target__none(&opts->target) &&
!target__has_task(&opts->target);
- if (!cpu_wide && perf_can_record_cpu_wide()) {
+ if (ptr->all_switch_events && !cpu_wide && perf_can_record_cpu_wide()) {
struct evsel *switch_evsel;
switch_evsel = evlist__add_dummy_on_all_cpus(evlist);
@@ -1178,6 +1180,16 @@ static u64 intel_pt_reference(struct auxtrace_record *itr __maybe_unused)
return rdtsc();
}
+static int intel_pt_perf_config(const char *var, const char *value, void *data)
+{
+ struct intel_pt_recording *ptr = data;
+
+ if (!strcmp(var, "intel-pt.all-switch-events"))
+ ptr->all_switch_events = perf_config_bool(var, value);
+
+ return 0;
+}
+
struct auxtrace_record *intel_pt_recording_init(int *err)
{
struct perf_pmu *intel_pt_pmu = perf_pmus__find(INTEL_PT_PMU_NAME);
@@ -1197,6 +1209,8 @@ struct auxtrace_record *intel_pt_recording_init(int *err)
return NULL;
}
+ perf_config(intel_pt_perf_config, ptr);
+
ptr->intel_pt_pmu = intel_pt_pmu;
ptr->itr.recording_options = intel_pt_recording_options;
ptr->itr.info_priv_size = intel_pt_info_priv_size;
diff --git a/tools/perf/arch/x86/util/mem-events.c b/tools/perf/arch/x86/util/mem-events.c
index 62df03e91c7e..b38f519020ff 100644
--- a/tools/perf/arch/x86/util/mem-events.c
+++ b/tools/perf/arch/x86/util/mem-events.c
@@ -26,3 +26,9 @@ struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX] = {
E(NULL, NULL, NULL, false, 0),
E("mem-ldst", "%s//", NULL, false, 0),
};
+
+struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX] = {
+ E(NULL, NULL, NULL, false, 0),
+ E(NULL, NULL, NULL, false, 0),
+ E("mem-ldst", "%s/ldlat=%u/", NULL, true, 0),
+};
diff --git a/tools/perf/arch/x86/util/mem-events.h b/tools/perf/arch/x86/util/mem-events.h
index f55c8d3b7d59..11e09a256f5b 100644
--- a/tools/perf/arch/x86/util/mem-events.h
+++ b/tools/perf/arch/x86/util/mem-events.h
@@ -6,5 +6,6 @@ extern struct perf_mem_event perf_mem_events_intel[PERF_MEM_EVENTS__MAX];
extern struct perf_mem_event perf_mem_events_intel_aux[PERF_MEM_EVENTS__MAX];
extern struct perf_mem_event perf_mem_events_amd[PERF_MEM_EVENTS__MAX];
+extern struct perf_mem_event perf_mem_events_amd_ldlat[PERF_MEM_EVENTS__MAX];
#endif /* _X86_MEM_EVENTS_H */
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index e0060dac2a9f..58113482654b 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -8,6 +8,8 @@
#include <linux/perf_event.h>
#include <linux/zalloc.h>
#include <api/fs/fs.h>
+#include <api/io_dir.h>
+#include <internal/cpumap.h>
#include <errno.h>
#include "../../../util/intel-pt.h"
@@ -16,10 +18,261 @@
#include "../../../util/fncache.h"
#include "../../../util/pmus.h"
#include "mem-events.h"
+#include "util/debug.h"
#include "util/env.h"
+#include "util/header.h"
-void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
+static bool x86__is_intel_graniterapids(void)
{
+ static bool checked_if_graniterapids;
+ static bool is_graniterapids;
+
+ if (!checked_if_graniterapids) {
+ const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]";
+ char *cpuid = get_cpuid_str((struct perf_cpu){0});
+
+ is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0;
+ free(cpuid);
+ checked_if_graniterapids = true;
+ }
+ return is_graniterapids;
+}
+
+static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
+{
+ struct perf_cpu_map *cpus;
+ char *buf = NULL;
+ size_t buf_len;
+
+ if (sysfs__read_str(sysfs_path, &buf, &buf_len) < 0)
+ return NULL;
+
+ cpus = perf_cpu_map__new(buf);
+ free(buf);
+ return cpus;
+}
+
+static int snc_nodes_per_l3_cache(void)
+{
+ static bool checked_snc;
+ static int snc_nodes;
+
+ if (!checked_snc) {
+ struct perf_cpu_map *node_cpus =
+ read_sysfs_cpu_map("devices/system/node/node0/cpulist");
+ struct perf_cpu_map *cache_cpus =
+ read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
+
+ snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
+ perf_cpu_map__put(cache_cpus);
+ perf_cpu_map__put(node_cpus);
+ checked_snc = true;
+ }
+ return snc_nodes;
+}
+
+static bool starts_with(const char *str, const char *prefix)
+{
+ return !strncmp(prefix, str, strlen(prefix));
+}
+
+static int num_chas(void)
+{
+ static bool checked_chas;
+ static int num_chas;
+
+ if (!checked_chas) {
+ int fd = perf_pmu__event_source_devices_fd();
+ struct io_dir dir;
+ struct io_dirent64 *dent;
+
+ if (fd < 0)
+ return -1;
+
+ io_dir__init(&dir, fd);
+
+ while ((dent = io_dir__readdir(&dir)) != NULL) {
+ /* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */
+ if (starts_with(dent->d_name, "uncore_cha_"))
+ num_chas++;
+ }
+ close(fd);
+ checked_chas = true;
+ }
+ return num_chas;
+}
+
+#define MAX_SNCS 6
+
+static int uncore_cha_snc(struct perf_pmu *pmu)
+{
+ // CHA SNC numbers are ordered correspond to the CHAs number.
+ unsigned int cha_num;
+ int num_cha, chas_per_node, cha_snc;
+ int snc_nodes = snc_nodes_per_l3_cache();
+
+ if (snc_nodes <= 1)
+ return 0;
+
+ num_cha = num_chas();
+ if (num_cha <= 0) {
+ pr_warning("Unexpected: no CHAs found\n");
+ return 0;
+ }
+
+ /* Compute SNC for PMU. */
+ if (sscanf(pmu->name, "uncore_cha_%u", &cha_num) != 1) {
+ pr_warning("Unexpected: unable to compute CHA number '%s'\n", pmu->name);
+ return 0;
+ }
+ chas_per_node = num_cha / snc_nodes;
+ cha_snc = cha_num / chas_per_node;
+
+ /* Range check cha_snc. for unexpected out of bounds. */
+ return cha_snc >= MAX_SNCS ? 0 : cha_snc;
+}
+
+static int uncore_imc_snc(struct perf_pmu *pmu)
+{
+ // Compute the IMC SNC using lookup tables.
+ unsigned int imc_num;
+ int snc_nodes = snc_nodes_per_l3_cache();
+ const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0};
+ const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2};
+ const u8 *snc_map;
+ size_t snc_map_len;
+
+ switch (snc_nodes) {
+ case 2:
+ snc_map = snc2_map;
+ snc_map_len = ARRAY_SIZE(snc2_map);
+ break;
+ case 3:
+ snc_map = snc3_map;
+ snc_map_len = ARRAY_SIZE(snc3_map);
+ break;
+ default:
+ /* Error or no lookup support for SNC with >3 nodes. */
+ return 0;
+ }
+
+ /* Compute SNC for PMU. */
+ if (sscanf(pmu->name, "uncore_imc_%u", &imc_num) != 1) {
+ pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name);
+ return 0;
+ }
+ if (imc_num >= snc_map_len) {
+ pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes);
+ return 0;
+ }
+ return snc_map[imc_num];
+}
+
+static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
+{
+ static bool checked_cpu_adjust[MAX_SNCS];
+ static int cpu_adjust[MAX_SNCS];
+ struct perf_cpu_map *node_cpus;
+ char node_path[] = "devices/system/node/node0/cpulist";
+
+ /* Was adjust already computed? */
+ if (checked_cpu_adjust[pmu_snc])
+ return cpu_adjust[pmu_snc];
+
+ /* SNC0 doesn't need an adjust. */
+ if (pmu_snc == 0) {
+ cpu_adjust[0] = 0;
+ checked_cpu_adjust[0] = true;
+ return 0;
+ }
+
+ /*
+ * Use NUMA topology to compute first CPU of the NUMA node, we want to
+ * adjust CPU 0 to be this and similarly for other CPUs if there is >1
+ * socket.
+ */
+ assert(pmu_snc >= 0 && pmu_snc <= 9);
+ node_path[24] += pmu_snc; // Shift node0 to be node<pmu_snc>.
+ node_cpus = read_sysfs_cpu_map(node_path);
+ cpu_adjust[pmu_snc] = perf_cpu_map__cpu(node_cpus, 0).cpu;
+ if (cpu_adjust[pmu_snc] < 0) {
+ pr_debug("Failed to read valid CPU list from <sysfs>/%s\n", node_path);
+ cpu_adjust[pmu_snc] = 0;
+ } else {
+ checked_cpu_adjust[pmu_snc] = true;
+ }
+ perf_cpu_map__put(node_cpus);
+ return cpu_adjust[pmu_snc];
+}
+
+static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
+{
+ // With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the
+ // topology. For example, a two socket graniterapids machine may be set
+ // up with 3-way SNC meaning there are 6 NUMA nodes that should be
+ // displayed with --per-node. The cpumask of the CHA and IMC PMUs
+ // reflects per-socket information meaning, for example, uncore_cha_60
+ // on a two socket graniterapids machine with 120 cores per socket will
+ // have a cpumask of "0,120". This cpumask needs adjusting to "40,160"
+ // to reflect that uncore_cha_60 is used for the 2nd SNC of each
+ // socket. Without the adjustment events on uncore_cha_60 will appear in
+ // node 0 and node 3 (in our example 2 socket 3-way set up), but with
+ // the adjustment they will appear in node 1 and node 4. The number of
+ // CHAs is typically larger than the number of cores. The CHA numbers
+ // are assumed to split evenly and inorder wrt core numbers. There are
+ // fewer memory IMC PMUs than cores and mapping is handled using lookup
+ // tables.
+ static struct perf_cpu_map *cha_adjusted[MAX_SNCS];
+ static struct perf_cpu_map *imc_adjusted[MAX_SNCS];
+ struct perf_cpu_map **adjusted = cha ? cha_adjusted : imc_adjusted;
+ int idx, pmu_snc, cpu_adjust;
+ struct perf_cpu cpu;
+ bool alloc;
+
+ // Cpus from the kernel holds first CPU of each socket. e.g. 0,120.
+ if (perf_cpu_map__cpu(pmu->cpus, 0).cpu != 0) {
+ pr_debug("Ignoring cpumask adjust for %s as unexpected first CPU\n", pmu->name);
+ return;
+ }
+
+ pmu_snc = cha ? uncore_cha_snc(pmu) : uncore_imc_snc(pmu);
+ if (pmu_snc == 0) {
+ // No adjustment necessary for the first SNC.
+ return;
+ }
+
+ alloc = adjusted[pmu_snc] == NULL;
+ if (alloc) {
+ // Hold onto the perf_cpu_map globally to avoid recomputation.
+ cpu_adjust = uncore_cha_imc_compute_cpu_adjust(pmu_snc);
+ adjusted[pmu_snc] = perf_cpu_map__empty_new(perf_cpu_map__nr(pmu->cpus));
+ if (!adjusted[pmu_snc])
+ return;
+ }
+
+ perf_cpu_map__for_each_cpu(cpu, idx, pmu->cpus) {
+ // Compute the new cpu map values or if not allocating, assert
+ // that they match expectations. asserts will be removed to
+ // avoid overhead in NDEBUG builds.
+ if (alloc) {
+ RC_CHK_ACCESS(adjusted[pmu_snc])->map[idx].cpu = cpu.cpu + cpu_adjust;
+ } else if (idx == 0) {
+ cpu_adjust = perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu - cpu.cpu;
+ assert(uncore_cha_imc_compute_cpu_adjust(pmu_snc) == cpu_adjust);
+ } else {
+ assert(perf_cpu_map__cpu(adjusted[pmu_snc], idx).cpu ==
+ cpu.cpu + cpu_adjust);
+ }
+ }
+
+ perf_cpu_map__put(pmu->cpus);
+ pmu->cpus = perf_cpu_map__get(adjusted[pmu_snc]);
+}
+
+void perf_pmu__arch_init(struct perf_pmu *pmu)
+{
+ struct perf_pmu_caps *ldlat_cap;
+
#ifdef HAVE_AUXTRACE_SUPPORT
if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
pmu->auxtrace = true;
@@ -33,12 +286,31 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
#endif
if (x86__is_amd_cpu()) {
- if (!strcmp(pmu->name, "ibs_op"))
- pmu->mem_events = perf_mem_events_amd;
- } else if (pmu->is_core) {
- if (perf_pmu__have_event(pmu, "mem-loads-aux"))
- pmu->mem_events = perf_mem_events_intel_aux;
- else
- pmu->mem_events = perf_mem_events_intel;
+ if (strcmp(pmu->name, "ibs_op"))
+ return;
+
+ pmu->mem_events = perf_mem_events_amd;
+
+ if (!perf_pmu__caps_parse(pmu))
+ return;
+
+ ldlat_cap = perf_pmu__get_cap(pmu, "ldlat");
+ if (!ldlat_cap || strcmp(ldlat_cap->value, "1"))
+ return;
+
+ perf_mem_events__loads_ldlat = 0;
+ pmu->mem_events = perf_mem_events_amd_ldlat;
+ } else {
+ if (pmu->is_core) {
+ if (perf_pmu__have_event(pmu, "mem-loads-aux"))
+ pmu->mem_events = perf_mem_events_intel_aux;
+ else
+ pmu->mem_events = perf_mem_events_intel;
+ } else if (x86__is_intel_graniterapids()) {
+ if (starts_with(pmu->name, "uncore_cha_"))
+ gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
+ else if (starts_with(pmu->name, "uncore_imc_"))
+ gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
+ }
}
}