summaryrefslogtreecommitdiff
path: root/drivers/perf/fsl_imx8_ddr_perf.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 16:24:30 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-14 16:24:30 -0800
commit0ca2ce81eb8ee30f3ba8ac7967fef9cfbb44dbdb (patch)
tree1c47e68f6ece1f7a752d2035c2ba169ddc601c22 /drivers/perf/fsl_imx8_ddr_perf.c
parent586592478b1fa8bb8cd6875a9191468e9b1a8b13 (diff)
parentd889797530c66f699170233474eab3361471e808 (diff)
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas: - Expose tag address bits in siginfo. The original arm64 ABI did not expose any of the bits 63:56 of a tagged address in siginfo. In the presence of user ASAN or MTE, this information may be useful. The implementation is generic to other architectures supporting tags (like SPARC ADI, subject to wiring up the arch code). The user will have to opt in via sigaction(SA_EXPOSE_TAGBITS) so that the extra bits, if available, become visible in si_addr. - Default to 32-bit wide ZONE_DMA. Previously, ZONE_DMA was set to the lowest 1GB to cope with the Raspberry Pi 4 limitations, to the detriment of other platforms. With these changes, the kernel scans the Device Tree dma-ranges and the ACPI IORT information before deciding on a smaller ZONE_DMA. - Strengthen READ_ONCE() to acquire when CONFIG_LTO=y. When building with LTO, there is an increased risk of the compiler converting an address dependency headed by a READ_ONCE() invocation into a control dependency and consequently allowing for harmful reordering by the CPU. - Add CPPC FFH support using arm64 AMU counters. - set_fs() removal on arm64. This renders the User Access Override (UAO) ARMv8 feature unnecessary. - Perf updates: PMU driver for the ARM DMC-620 memory controller, sysfs identifier file for SMMUv3, stop event counters support for i.MX8MP, enable the perf events-based hard lockup detector. - Reorganise the kernel VA space slightly so that 52-bit VA configurations can use more virtual address space. - Improve the robustness of the arm64 memory offline event notifier. - Pad the Image header to 64K following the EFI header definition updated recently to increase the section alignment to 64K. - Support CONFIG_CMDLINE_EXTEND on arm64. - Do not use tagged PC in the kernel (TCR_EL1.TBID1==1), freeing up 8 bits for PtrAuth. - Switch to vmapped shadow call stacks. - Miscellaneous clean-ups. * tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (78 commits) perf/imx_ddr: Add system PMU identifier for userspace bindings: perf: imx-ddr: add compatible string arm64: Fix build failure when HARDLOCKUP_DETECTOR_PERF is enabled arm64: mte: fix prctl(PR_GET_TAGGED_ADDR_CTRL) if TCF0=NONE arm64: mark __system_matches_cap as __maybe_unused arm64: uaccess: remove vestigal UAO support arm64: uaccess: remove redundant PAN toggling arm64: uaccess: remove addr_limit_user_check() arm64: uaccess: remove set_fs() arm64: uaccess cleanup macro naming arm64: uaccess: split user/kernel routines arm64: uaccess: refactor __{get,put}_user arm64: uaccess: simplify __copy_user_flushcache() arm64: uaccess: rename privileged uaccess routines arm64: sdei: explicitly simulate PAN/UAO entry arm64: sdei: move uaccess logic to arch/arm64/ arm64: head.S: always initialize PSTATE arm64: head.S: cleanup SCTLR_ELx initialization arm64: head.S: rename el2_setup -> init_kernel_el arm64: add C wrappers for SET_PSTATE_*() ...
Diffstat (limited to 'drivers/perf/fsl_imx8_ddr_perf.c')
-rw-r--r--drivers/perf/fsl_imx8_ddr_perf.c135
1 files changed, 109 insertions, 26 deletions
diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c
index 397540a4b799..a11bfd8a0823 100644
--- a/drivers/perf/fsl_imx8_ddr_perf.c
+++ b/drivers/perf/fsl_imx8_ddr_perf.c
@@ -50,6 +50,7 @@ static DEFINE_IDA(ddr_ida);
struct fsl_ddr_devtype_data {
unsigned int quirks; /* quirks needed for different DDR Perf core */
+ const char *identifier; /* system PMU identifier for userspace */
};
static const struct fsl_ddr_devtype_data imx8_devtype_data;
@@ -58,13 +59,32 @@ static const struct fsl_ddr_devtype_data imx8m_devtype_data = {
.quirks = DDR_CAP_AXI_ID_FILTER,
};
+static const struct fsl_ddr_devtype_data imx8mq_devtype_data = {
+ .quirks = DDR_CAP_AXI_ID_FILTER,
+ .identifier = "i.MX8MQ",
+};
+
+static const struct fsl_ddr_devtype_data imx8mm_devtype_data = {
+ .quirks = DDR_CAP_AXI_ID_FILTER,
+ .identifier = "i.MX8MM",
+};
+
+static const struct fsl_ddr_devtype_data imx8mn_devtype_data = {
+ .quirks = DDR_CAP_AXI_ID_FILTER,
+ .identifier = "i.MX8MN",
+};
+
static const struct fsl_ddr_devtype_data imx8mp_devtype_data = {
.quirks = DDR_CAP_AXI_ID_FILTER_ENHANCED,
+ .identifier = "i.MX8MP",
};
static const struct of_device_id imx_ddr_pmu_dt_ids[] = {
{ .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data},
{ .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data},
+ { .compatible = "fsl,imx8mq-ddr-pmu", .data = &imx8mq_devtype_data},
+ { .compatible = "fsl,imx8mm-ddr-pmu", .data = &imx8mm_devtype_data},
+ { .compatible = "fsl,imx8mn-ddr-pmu", .data = &imx8mn_devtype_data},
{ .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data},
{ /* sentinel */ }
};
@@ -84,6 +104,40 @@ struct ddr_pmu {
int id;
};
+static ssize_t ddr_perf_identifier_show(struct device *dev,
+ struct device_attribute *attr,
+ char *page)
+{
+ struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+ return sprintf(page, "%s\n", pmu->devtype_data->identifier);
+}
+
+static umode_t ddr_perf_identifier_attr_visible(struct kobject *kobj,
+ struct attribute *attr,
+ int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct ddr_pmu *pmu = dev_get_drvdata(dev);
+
+ if (!pmu->devtype_data->identifier)
+ return 0;
+ return attr->mode;
+};
+
+static struct device_attribute ddr_perf_identifier_attr =
+ __ATTR(identifier, 0444, ddr_perf_identifier_show, NULL);
+
+static struct attribute *ddr_perf_identifier_attrs[] = {
+ &ddr_perf_identifier_attr.attr,
+ NULL,
+};
+
+static struct attribute_group ddr_perf_identifier_attr_group = {
+ .attrs = ddr_perf_identifier_attrs,
+ .is_visible = ddr_perf_identifier_attr_visible,
+};
+
enum ddr_perf_filter_capabilities {
PERF_CAP_AXI_ID_FILTER = 0,
PERF_CAP_AXI_ID_FILTER_ENHANCED,
@@ -237,6 +291,7 @@ static const struct attribute_group *attr_groups[] = {
&ddr_perf_format_attr_group,
&ddr_perf_cpumask_attr_group,
&ddr_perf_filter_cap_attr_group,
+ &ddr_perf_identifier_attr_group,
NULL,
};
@@ -361,25 +416,6 @@ static int ddr_perf_event_init(struct perf_event *event)
return 0;
}
-
-static void ddr_perf_event_update(struct perf_event *event)
-{
- struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
- struct hw_perf_event *hwc = &event->hw;
- u64 delta, prev_raw_count, new_raw_count;
- int counter = hwc->idx;
-
- do {
- prev_raw_count = local64_read(&hwc->prev_count);
- new_raw_count = ddr_perf_read_counter(pmu, counter);
- } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
- new_raw_count) != prev_raw_count);
-
- delta = (new_raw_count - prev_raw_count) & 0xFFFFFFFF;
-
- local64_add(delta, &event->count);
-}
-
static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
int counter, bool enable)
{
@@ -404,6 +440,56 @@ static void ddr_perf_counter_enable(struct ddr_pmu *pmu, int config,
}
}
+static bool ddr_perf_counter_overflow(struct ddr_pmu *pmu, int counter)
+{
+ int val;
+
+ val = readl_relaxed(pmu->base + counter * 4 + COUNTER_CNTL);
+
+ return val & CNTL_OVER;
+}
+
+static void ddr_perf_counter_clear(struct ddr_pmu *pmu, int counter)
+{
+ u8 reg = counter * 4 + COUNTER_CNTL;
+ int val;
+
+ val = readl_relaxed(pmu->base + reg);
+ val &= ~CNTL_CLEAR;
+ writel(val, pmu->base + reg);
+
+ val |= CNTL_CLEAR;
+ writel(val, pmu->base + reg);
+}
+
+static void ddr_perf_event_update(struct perf_event *event)
+{
+ struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
+ struct hw_perf_event *hwc = &event->hw;
+ u64 new_raw_count;
+ int counter = hwc->idx;
+ int ret;
+
+ new_raw_count = ddr_perf_read_counter(pmu, counter);
+ local64_add(new_raw_count, &event->count);
+
+ /*
+ * For legacy SoCs: event counter continue counting when overflow,
+ * no need to clear the counter.
+ * For new SoCs: event counter stop counting when overflow, need
+ * clear counter to let it count again.
+ */
+ if (counter != EVENT_CYCLES_COUNTER) {
+ ret = ddr_perf_counter_overflow(pmu, counter);
+ if (ret)
+ dev_warn_ratelimited(pmu->dev, "events lost due to counter overflow (config 0x%llx)\n",
+ event->attr.config);
+ }
+
+ /* clear counter every time for both cycle counter and event counter */
+ ddr_perf_counter_clear(pmu, counter);
+}
+
static void ddr_perf_event_start(struct perf_event *event, int flags)
{
struct ddr_pmu *pmu = to_ddr_pmu(event->pmu);
@@ -537,7 +623,7 @@ static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
{
int i;
struct ddr_pmu *pmu = (struct ddr_pmu *) p;
- struct perf_event *event, *cycle_event = NULL;
+ struct perf_event *event;
/* all counter will stop if cycle counter disabled */
ddr_perf_counter_enable(pmu,
@@ -547,7 +633,9 @@ static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
/*
* When the cycle counter overflows, all counters are stopped,
* and an IRQ is raised. If any other counter overflows, it
- * continues counting, and no IRQ is raised.
+ * continues counting, and no IRQ is raised. But for new SoCs,
+ * such as i.MX8MP, event counter would stop when overflow, so
+ * we need use cycle counter to stop overflow of event counter.
*
* Cycles occur at least 4 times as often as other events, so we
* can update all events on a cycle counter overflow and not
@@ -562,17 +650,12 @@ static irqreturn_t ddr_perf_irq_handler(int irq, void *p)
event = pmu->events[i];
ddr_perf_event_update(event);
-
- if (event->hw.idx == EVENT_CYCLES_COUNTER)
- cycle_event = event;
}
ddr_perf_counter_enable(pmu,
EVENT_CYCLES_ID,
EVENT_CYCLES_COUNTER,
true);
- if (cycle_event)
- ddr_perf_event_update(cycle_event);
return IRQ_HANDLED;
}