summaryrefslogtreecommitdiff
path: root/tools/perf/util/arm-spe.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/perf/util/arm-spe.c')
-rw-r--r--tools/perf/util/arm-spe.c263
1 files changed, 184 insertions, 79 deletions
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 8942fa598a84..71be979f5077 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -39,6 +39,18 @@
#define is_ldst_op(op) (!!((op) & ARM_SPE_OP_LDST))
+#define ARM_SPE_CACHE_EVENT(lvl) \
+ (ARM_SPE_##lvl##_ACCESS | ARM_SPE_##lvl##_MISS)
+
+#define arm_spe_is_cache_level(type, lvl) \
+ ((type) & ARM_SPE_CACHE_EVENT(lvl))
+
+#define arm_spe_is_cache_hit(type, lvl) \
+ (((type) & ARM_SPE_CACHE_EVENT(lvl)) == ARM_SPE_##lvl##_ACCESS)
+
+#define arm_spe_is_cache_miss(type, lvl) \
+ ((type) & ARM_SPE_##lvl##_MISS)
+
struct arm_spe {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
@@ -62,7 +74,6 @@ struct arm_spe {
u8 sample_remote_access;
u8 sample_memory;
u8 sample_instructions;
- u64 instructions_sample_period;
u64 l1d_miss_id;
u64 l1d_access_id;
@@ -101,7 +112,7 @@ struct arm_spe_queue {
u64 time;
u64 timestamp;
struct thread *thread;
- u64 period_instructions;
+ u64 sample_count;
u32 flags;
struct branch_stack *last_branch;
};
@@ -228,7 +239,6 @@ static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
speq->pid = -1;
speq->tid = -1;
speq->cpu = -1;
- speq->period_instructions = 0;
/* params set */
params.get_trace = arm_spe_get_trace;
@@ -305,15 +315,28 @@ static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
return 0;
}
-static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu)
+static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, int cpu)
{
u64 i;
if (!spe->metadata)
return NULL;
+ /* CPU ID is -1 for per-thread mode */
+ if (cpu < 0) {
+ /*
+ * On the heterogeneous system, due to CPU ID is -1,
+ * cannot confirm the data source packet is supported.
+ */
+ if (!spe->is_homogeneous)
+ return NULL;
+
+ /* In homogeneous system, simply use CPU0's metadata */
+ return spe->metadata[0];
+ }
+
for (i = 0; i < spe->metadata_nr_cpu; i++)
- if (spe->metadata[i][ARM_SPE_CPU] == cpu)
+ if (spe->metadata[i][ARM_SPE_CPU] == (u64)cpu)
return spe->metadata[i];
return NULL;
@@ -352,7 +375,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
sample->cpumode = arm_spe_cpumode(spe, sample->ip);
sample->pid = speq->pid;
sample->tid = speq->tid;
- sample->period = 1;
+ sample->period = spe->synth_opts.period;
sample->cpu = speq->cpu;
sample->simd_flags = arm_spe__synth_simd_flags(record);
@@ -471,7 +494,8 @@ arm_spe_deliver_synth_event(struct arm_spe *spe,
}
static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
- u64 spe_events_id, u64 data_src)
+ u64 spe_events_id,
+ union perf_mem_data_src data_src)
{
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
@@ -486,7 +510,7 @@ static int arm_spe__synth_mem_sample(struct arm_spe_queue *speq,
sample.stream_id = spe_events_id;
sample.addr = record->virt_addr;
sample.phys_addr = record->phys_addr;
- sample.data_src = data_src;
+ sample.data_src = data_src.val;
sample.weight = record->latency;
ret = arm_spe_deliver_synth_event(spe, speq, event, &sample);
@@ -519,7 +543,8 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq,
}
static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
- u64 spe_events_id, u64 data_src)
+ u64 spe_events_id,
+ union perf_mem_data_src data_src)
{
struct arm_spe *spe = speq->spe;
struct arm_spe_record *record = &speq->decoder->record;
@@ -527,14 +552,6 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
struct perf_sample sample;
int ret;
- /*
- * Handles perf instruction sampling period.
- */
- speq->period_instructions++;
- if (speq->period_instructions < spe->instructions_sample_period)
- return 0;
- speq->period_instructions = 0;
-
perf_sample__init(&sample, /*all=*/true);
arm_spe_prep_sample(spe, speq, event, &sample);
@@ -542,8 +559,7 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq,
sample.stream_id = spe_events_id;
sample.addr = record->to_ip;
sample.phys_addr = record->phys_addr;
- sample.data_src = data_src;
- sample.period = spe->instructions_sample_period;
+ sample.data_src = data_src.val;
sample.weight = record->latency;
sample.flags = speq->flags;
sample.branch_stack = speq->last_branch;
@@ -670,8 +686,8 @@ static void arm_spe__synth_data_source_common(const struct arm_spe_record *recor
* socket
*/
case ARM_SPE_COMMON_DS_REMOTE:
- data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1;
- data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE;
+ data_src->mem_lvl = PERF_MEM_LVL_NA;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER;
break;
@@ -819,30 +835,121 @@ static const struct data_source_handle data_source_handles[] = {
DS(hisi_hip_ds_encoding_cpus, data_source_hisi_hip),
};
-static void arm_spe__synth_memory_level(const struct arm_spe_record *record,
- union perf_mem_data_src *data_src)
+static void arm_spe__synth_ld_memory_level(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
{
- if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
- data_src->mem_lvl = PERF_MEM_LVL_L3;
+ /*
+ * To find a cache hit, search in ascending order from the lower level
+ * caches to the higher level caches. This reflects the best scenario
+ * for a cache hit.
+ */
+ if (arm_spe_is_cache_hit(record->type, L1D)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+ } else if (record->type & ARM_SPE_RECENTLY_FETCHED) {
+ data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_LFB;
+ } else if (arm_spe_is_cache_hit(record->type, L2D)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ } else if (arm_spe_is_cache_hit(record->type, LLC)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ /*
+ * To find a cache miss, search in descending order from the higher
+ * level cache to the lower level cache. This represents the worst
+ * scenario for a cache miss.
+ */
+ } else if (arm_spe_is_cache_miss(record->type, LLC)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_MISS;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ } else if (arm_spe_is_cache_miss(record->type, L2D)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_MISS;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ } else if (arm_spe_is_cache_miss(record->type, L1D)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+ }
+}
- if (record->type & ARM_SPE_LLC_MISS)
- data_src->mem_lvl |= PERF_MEM_LVL_MISS;
- else
- data_src->mem_lvl |= PERF_MEM_LVL_HIT;
- } else if (record->type & (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS)) {
+static void arm_spe__synth_st_memory_level(const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
+{
+ /* Record the greatest level info for a store operation. */
+ if (arm_spe_is_cache_level(record->type, LLC)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L3;
+ data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, LLC) ?
+ PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3;
+ } else if (arm_spe_is_cache_level(record->type, L2D)) {
+ data_src->mem_lvl = PERF_MEM_LVL_L2;
+ data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L2D) ?
+ PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2;
+ } else if (arm_spe_is_cache_level(record->type, L1D)) {
data_src->mem_lvl = PERF_MEM_LVL_L1;
+ data_src->mem_lvl |= arm_spe_is_cache_miss(record->type, L1D) ?
+ PERF_MEM_LVL_MISS : PERF_MEM_LVL_HIT;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1;
+ }
+}
- if (record->type & ARM_SPE_L1D_MISS)
- data_src->mem_lvl |= PERF_MEM_LVL_MISS;
- else
- data_src->mem_lvl |= PERF_MEM_LVL_HIT;
+static void arm_spe__synth_memory_level(struct arm_spe_queue *speq,
+ const struct arm_spe_record *record,
+ union perf_mem_data_src *data_src)
+{
+ struct arm_spe *spe = speq->spe;
+
+ /*
+ * The data source packet contains more info for cache levels for
+ * peer snooping. So respect the memory level if has been set by
+ * data source parsing.
+ */
+ if (!data_src->mem_lvl) {
+ if (data_src->mem_op == PERF_MEM_OP_LOAD)
+ arm_spe__synth_ld_memory_level(record, data_src);
+ if (data_src->mem_op == PERF_MEM_OP_STORE)
+ arm_spe__synth_st_memory_level(record, data_src);
+ }
+
+ if (!data_src->mem_lvl) {
+ data_src->mem_lvl = PERF_MEM_LVL_NA;
+ data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
+ }
+
+ /*
+ * If 'mem_snoop' has been set by data source packet, skip to set
+ * it at here.
+ */
+ if (!data_src->mem_snoop) {
+ if (record->type & ARM_SPE_DATA_SNOOPED) {
+ if (record->type & ARM_SPE_HITM)
+ data_src->mem_snoop = PERF_MEM_SNOOP_HITM;
+ else
+ data_src->mem_snoop = PERF_MEM_SNOOP_HIT;
+ } else {
+ u64 *metadata =
+ arm_spe__get_metadata_by_cpu(spe, speq->cpu);
+
+ /*
+ * Set NA ("Not available") mode if no meta data or the
+ * SNOOPED event is not supported.
+ */
+ if (!metadata ||
+ !(metadata[ARM_SPE_CAP_EVENT_FILTER] & ARM_SPE_DATA_SNOOPED))
+ data_src->mem_snoop = PERF_MEM_SNOOP_NA;
+ else
+ data_src->mem_snoop = PERF_MEM_SNOOP_NONE;
+ }
}
- if (record->type & ARM_SPE_REMOTE_ACCESS)
- data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1;
+ if (!data_src->mem_remote) {
+ if (record->type & ARM_SPE_REMOTE_ACCESS)
+ data_src->mem_remote = PERF_MEM_REMOTE_REMOTE;
+ }
}
-static bool arm_spe__synth_ds(struct arm_spe_queue *speq,
+static void arm_spe__synth_ds(struct arm_spe_queue *speq,
const struct arm_spe_record *record,
union perf_mem_data_src *data_src)
{
@@ -859,56 +966,41 @@ static bool arm_spe__synth_ds(struct arm_spe_queue *speq,
cpuid = perf_env__cpuid(perf_session__env(spe->session));
midr = strtol(cpuid, NULL, 16);
} else {
- /* CPU ID is -1 for per-thread mode */
- if (speq->cpu < 0) {
- /*
- * On the heterogeneous system, due to CPU ID is -1,
- * cannot confirm the data source packet is supported.
- */
- if (!spe->is_homogeneous)
- return false;
-
- /* In homogeneous system, simply use CPU0's metadata */
- if (spe->metadata)
- metadata = spe->metadata[0];
- } else {
- metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu);
- }
-
+ metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu);
if (!metadata)
- return false;
+ return;
midr = metadata[ARM_SPE_CPU_MIDR];
}
for (i = 0; i < ARRAY_SIZE(data_source_handles); i++) {
if (is_midr_in_range_list(midr, data_source_handles[i].midr_ranges)) {
- data_source_handles[i].ds_synth(record, data_src);
- return true;
+ return data_source_handles[i].ds_synth(record, data_src);
}
}
- return false;
+ return;
}
-static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
- const struct arm_spe_record *record)
+static union perf_mem_data_src
+arm_spe__synth_data_source(struct arm_spe_queue *speq,
+ const struct arm_spe_record *record)
{
- union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
+ union perf_mem_data_src data_src = {};
/* Only synthesize data source for LDST operations */
if (!is_ldst_op(record->op))
- return 0;
+ return data_src;
if (record->op & ARM_SPE_OP_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
else if (record->op & ARM_SPE_OP_ST)
data_src.mem_op = PERF_MEM_OP_STORE;
else
- return 0;
+ return data_src;
- if (!arm_spe__synth_ds(speq, record, &data_src))
- arm_spe__synth_memory_level(record, &data_src);
+ arm_spe__synth_ds(speq, record, &data_src);
+ arm_spe__synth_memory_level(speq, record, &data_src);
if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) {
data_src.mem_dtlb = PERF_MEM_TLB_WK;
@@ -919,16 +1011,24 @@ static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq,
data_src.mem_dtlb |= PERF_MEM_TLB_HIT;
}
- return data_src.val;
+ return data_src;
}
static int arm_spe_sample(struct arm_spe_queue *speq)
{
const struct arm_spe_record *record = &speq->decoder->record;
struct arm_spe *spe = speq->spe;
- u64 data_src;
+ union perf_mem_data_src data_src;
int err;
+ /*
+ * Discard all samples until period is reached
+ */
+ speq->sample_count++;
+ if (speq->sample_count < spe->synth_opts.period)
+ return 0;
+ speq->sample_count = 0;
+
arm_spe__sample_flags(speq);
data_src = arm_spe__synth_data_source(speq, record);
@@ -1532,6 +1632,7 @@ static const char * const metadata_per_cpu_fmts[] = {
[ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n",
[ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n",
[ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n",
+ [ARM_SPE_CAP_EVENT_FILTER] = " Event Filter :0x%"PRIx64"\n",
};
static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr)
@@ -1628,6 +1729,7 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
attr.exclude_guest = evsel->core.attr.exclude_guest;
attr.sample_id_all = evsel->core.attr.sample_id_all;
attr.read_format = evsel->core.attr.read_format;
+ attr.sample_period = spe->synth_opts.period;
/* create new id val to be a fixed offset from evsel id */
id = evsel->core.id[0] + 1000000000;
@@ -1744,25 +1846,15 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
}
if (spe->synth_opts.instructions) {
- if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
- pr_warning("Only instruction-based sampling period is currently supported by Arm SPE.\n");
- goto synth_instructions_out;
- }
- if (spe->synth_opts.period > 1)
- pr_warning("Arm SPE has a hardware-based sample period.\n"
- "Additional instruction events will be discarded by --itrace\n");
-
spe->sample_instructions = true;
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
- attr.sample_period = spe->synth_opts.period;
- spe->instructions_sample_period = attr.sample_period;
+
err = perf_session__deliver_synth_attr_event(session, &attr, id);
if (err)
return err;
spe->instructions_id = id;
arm_spe_set_event_name(evlist, id, "instructions");
}
-synth_instructions_out:
return 0;
}
@@ -1871,10 +1963,23 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
if (dump_trace)
return 0;
- if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
spe->synth_opts = *session->itrace_synth_opts;
- else
+ } else {
itrace_synth_opts__set_default(&spe->synth_opts, false);
+ /* Default nanoseconds period not supported */
+ spe->synth_opts.period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
+ spe->synth_opts.period = 1;
+ }
+
+ if (spe->synth_opts.period_type != PERF_ITRACE_PERIOD_INSTRUCTIONS) {
+ ui__error("You must only use i (instructions) --itrace period with Arm SPE. e.g --itrace=i1i\n");
+ err = -EINVAL;
+ goto err_free_queues;
+ }
+ if (spe->synth_opts.period > 1)
+ ui__warning("Arm SPE has a hardware-based sampling period.\n\n"
+ "--itrace periods > 1i downsample by an interval of n SPE samples rather than n instructions.\n");
err = arm_spe_synth_events(spe, session);
if (err)