summaryrefslogtreecommitdiff
path: root/arch/x86
diff options
context:
space:
mode:
authorRussell King (Oracle) <rmk+kernel@armlinux.org.uk>2023-06-27 14:17:19 +0100
committerRussell King (Oracle) <rmk+kernel@armlinux.org.uk>2023-06-27 14:17:19 +0100
commit53ae158f6ddc14df5c44d62c06e33fdb66de1196 (patch)
treed0d0485f5f614e0070bab6ff1b53d56aec7c2d8e /arch/x86
parentac9a78681b921877518763ba0e89202254349d1b (diff)
parent47ba5f39eab3c2a9a1ba878159a6050f2bbfc0e2 (diff)
Merge tag 'arm-vfp-refactor-for-rmk' of git://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux into devel-stabledevel-stable
Refactor VFP support code and reimplement in C The VFP related changes to permit kernel mode NEON in softirq context resulted in some issues regarding en/disabling of sofirqs from asm code, and this made it clear that it would be better to handle more of it from C code. Given that we already have infrastructure that associates undefined instruction exceptions with handler code based on value/mask pairs, we can easily move the dispatch of VFP and NEON instructions to C code once we reimplement the actual VFP support routine (which reasons about how to deal with the exception and whether any emulation is needed) in C code first. With those out of the way, we can drop the partial decoding logic in asm that reasons about which ISA is being used by user space, as the remaining cases are all 32-bit ARM only. This leaves a FPE specific routine with some iWMMXT logic that is easily duplicated in C as well, allowing us to move the FPE asm code into the FPE asm source file, and out of the shared entry code.
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/events/core.c6
-rw-r--r--arch/x86/events/intel/ds.c56
-rw-r--r--arch/x86/include/asm/perf_event.h3
-rw-r--r--arch/x86/kernel/amd_nb.c2
-rw-r--r--arch/x86/lib/retpoline.S4
5 files changed, 41 insertions, 30 deletions
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d096b04bf80e..9d248703cbdd 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1703,10 +1703,8 @@ int x86_pmu_handle_irq(struct pt_regs *regs)
perf_sample_data_init(&data, 0, event->hw.last_period);
- if (has_branch_stack(event)) {
- data.br_stack = &cpuc->lbr_stack;
- data.sample_flags |= PERF_SAMPLE_BRANCH_STACK;
- }
+ if (has_branch_stack(event))
+ perf_sample_save_brstack(&data, event, &cpuc->lbr_stack);
if (perf_event_overflow(event, &data, regs))
x86_pmu_stop(event, 0);
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index a2e566e53076..df88576d6b2a 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1229,12 +1229,14 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
struct perf_event *event, bool add)
{
struct pmu *pmu = event->pmu;
+
/*
* Make sure we get updated with the first PEBS
* event. It will trigger also during removal, but
* that does not hurt:
*/
- bool update = cpuc->n_pebs == 1;
+ if (cpuc->n_pebs == 1)
+ cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW;
if (needed_cb != pebs_needs_sched_cb(cpuc)) {
if (!needed_cb)
@@ -1242,7 +1244,7 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
else
perf_sched_cb_dec(pmu);
- update = true;
+ cpuc->pebs_data_cfg |= PEBS_UPDATE_DS_SW;
}
/*
@@ -1252,24 +1254,13 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
if (x86_pmu.intel_cap.pebs_baseline && add) {
u64 pebs_data_cfg;
- /* Clear pebs_data_cfg and pebs_record_size for first PEBS. */
- if (cpuc->n_pebs == 1) {
- cpuc->pebs_data_cfg = 0;
- cpuc->pebs_record_size = sizeof(struct pebs_basic);
- }
-
pebs_data_cfg = pebs_update_adaptive_cfg(event);
-
- /* Update pebs_record_size if new event requires more data. */
- if (pebs_data_cfg & ~cpuc->pebs_data_cfg) {
- cpuc->pebs_data_cfg |= pebs_data_cfg;
- adaptive_pebs_record_size_update();
- update = true;
- }
+ /*
+ * Be sure to update the thresholds when we change the record.
+ */
+ if (pebs_data_cfg & ~cpuc->pebs_data_cfg)
+ cpuc->pebs_data_cfg |= pebs_data_cfg | PEBS_UPDATE_DS_SW;
}
-
- if (update)
- pebs_update_threshold(cpuc);
}
void intel_pmu_pebs_add(struct perf_event *event)
@@ -1326,9 +1317,17 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event)
wrmsrl(base + idx, value);
}
+static inline void intel_pmu_drain_large_pebs(struct cpu_hw_events *cpuc)
+{
+ if (cpuc->n_pebs == cpuc->n_large_pebs &&
+ cpuc->n_pebs != cpuc->n_pebs_via_pt)
+ intel_pmu_drain_pebs_buffer();
+}
+
void intel_pmu_pebs_enable(struct perf_event *event)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+ u64 pebs_data_cfg = cpuc->pebs_data_cfg & ~PEBS_UPDATE_DS_SW;
struct hw_perf_event *hwc = &event->hw;
struct debug_store *ds = cpuc->ds;
unsigned int idx = hwc->idx;
@@ -1344,11 +1343,22 @@ void intel_pmu_pebs_enable(struct perf_event *event)
if (x86_pmu.intel_cap.pebs_baseline) {
hwc->config |= ICL_EVENTSEL_ADAPTIVE;
- if (cpuc->pebs_data_cfg != cpuc->active_pebs_data_cfg) {
- wrmsrl(MSR_PEBS_DATA_CFG, cpuc->pebs_data_cfg);
- cpuc->active_pebs_data_cfg = cpuc->pebs_data_cfg;
+ if (pebs_data_cfg != cpuc->active_pebs_data_cfg) {
+ /*
+ * drain_pebs() assumes uniform record size;
+ * hence we need to drain when changing said
+ * size.
+ */
+ intel_pmu_drain_large_pebs(cpuc);
+ adaptive_pebs_record_size_update();
+ wrmsrl(MSR_PEBS_DATA_CFG, pebs_data_cfg);
+ cpuc->active_pebs_data_cfg = pebs_data_cfg;
}
}
+ if (cpuc->pebs_data_cfg & PEBS_UPDATE_DS_SW) {
+ cpuc->pebs_data_cfg = pebs_data_cfg;
+ pebs_update_threshold(cpuc);
+ }
if (idx >= INTEL_PMC_IDX_FIXED) {
if (x86_pmu.intel_cap.pebs_format < 5)
@@ -1391,9 +1401,7 @@ void intel_pmu_pebs_disable(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct hw_perf_event *hwc = &event->hw;
- if (cpuc->n_pebs == cpuc->n_large_pebs &&
- cpuc->n_pebs != cpuc->n_pebs_via_pt)
- intel_pmu_drain_pebs_buffer();
+ intel_pmu_drain_large_pebs(cpuc);
cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 8fc15ed5e60b..abf09882f58b 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -121,6 +121,9 @@
#define PEBS_DATACFG_LBRS BIT_ULL(3)
#define PEBS_DATACFG_LBR_SHIFT 24
+/* Steal the highest bit of pebs_data_cfg for SW usage */
+#define PEBS_UPDATE_DS_SW BIT_ULL(63)
+
/*
* Intel "Architectural Performance Monitoring" CPUID
* detection/enumeration details:
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c
index 4266b64631a4..7e331e8f3692 100644
--- a/arch/x86/kernel/amd_nb.c
+++ b/arch/x86/kernel/amd_nb.c
@@ -36,6 +36,7 @@
#define PCI_DEVICE_ID_AMD_19H_M50H_DF_F4 0x166e
#define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4
#define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4
+#define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc
/* Protect the PCI config register pairs used for SMN. */
static DEFINE_MUTEX(smn_mutex);
@@ -79,6 +80,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) },
{}
};
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 27ef53fab6bd..b3b1e376dce8 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -144,8 +144,8 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array)
*/
.align 64
.skip 63, 0xcc
-SYM_FUNC_START_NOALIGN(zen_untrain_ret);
-
+SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE)
+ ANNOTATE_NOENDBR
/*
* As executed from zen_untrain_ret, this is:
*