summaryrefslogtreecommitdiff
path: root/arch/riscv/kernel/unaligned_access_speed.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/riscv/kernel/unaligned_access_speed.c')
-rw-r--r--arch/riscv/kernel/unaligned_access_speed.c156
1 files changed, 148 insertions, 8 deletions
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c
index 160628a2116d..91f189cf1611 100644
--- a/arch/riscv/kernel/unaligned_access_speed.c
+++ b/arch/riscv/kernel/unaligned_access_speed.c
@@ -6,11 +6,13 @@
#include <linux/cpu.h>
#include <linux/cpumask.h>
#include <linux/jump_label.h>
+#include <linux/kthread.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/types.h>
#include <asm/cpufeature.h>
#include <asm/hwprobe.h>
+#include <asm/vector.h>
#include "copy-unaligned.h"
@@ -19,7 +21,8 @@
#define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE)
#define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80)
-DEFINE_PER_CPU(long, misaligned_access_speed);
+DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN;
+DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS
static cpumask_t fast_misaligned_access;
@@ -191,6 +194,7 @@ static int riscv_online_cpu(unsigned int cpu)
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN)
goto exit;
+ check_unaligned_access_emulated(NULL);
buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
if (!buf) {
pr_warn("Allocation failure, not measuring misaligned performance\n");
@@ -259,23 +263,159 @@ out:
kfree(bufs);
return 0;
}
+#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
+static int check_unaligned_access_speed_all_cpus(void)
+{
+ return 0;
+}
+#endif
-static int check_unaligned_access_all_cpus(void)
+#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS
+static void check_vector_unaligned_access(struct work_struct *work __always_unused)
{
- bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
+ int cpu = smp_processor_id();
+ u64 start_cycles, end_cycles;
+ u64 word_cycles;
+ u64 byte_cycles;
+ int ratio;
+ unsigned long start_jiffies, now;
+ struct page *page;
+ void *dst;
+ void *src;
+ long speed = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW;
- if (!all_cpus_emulated)
- return check_unaligned_access_speed_all_cpus();
+ if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN)
+ return;
+
+ page = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER);
+ if (!page) {
+ pr_warn("Allocation failure, not measuring vector misaligned performance\n");
+ return;
+ }
+
+ /* Make an unaligned destination buffer. */
+ dst = (void *)((unsigned long)page_address(page) | 0x1);
+ /* Unalign src as well, but differently (off by 1 + 2 = 3). */
+ src = dst + (MISALIGNED_BUFFER_SIZE / 2);
+ src += 2;
+ word_cycles = -1ULL;
+
+ /* Do a warmup. */
+ kernel_vector_begin();
+ __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ start_jiffies = jiffies;
+ while ((now = jiffies) == start_jiffies)
+ cpu_relax();
+
+ /*
+ * For a fixed amount of time, repeatedly try the function, and take
+ * the best time in cycles as the measurement.
+ */
+ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+ start_cycles = get_cycles64();
+ /* Ensure the CSR read can't reorder WRT to the copy. */
+ mb();
+ __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ /* Ensure the copy ends before the end time is snapped. */
+ mb();
+ end_cycles = get_cycles64();
+ if ((end_cycles - start_cycles) < word_cycles)
+ word_cycles = end_cycles - start_cycles;
+ }
+
+ byte_cycles = -1ULL;
+ __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ start_jiffies = jiffies;
+ while ((now = jiffies) == start_jiffies)
+ cpu_relax();
+
+ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
+ start_cycles = get_cycles64();
+ /* Ensure the CSR read can't reorder WRT to the copy. */
+ mb();
+ __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE);
+ /* Ensure the copy ends before the end time is snapped. */
+ mb();
+ end_cycles = get_cycles64();
+ if ((end_cycles - start_cycles) < byte_cycles)
+ byte_cycles = end_cycles - start_cycles;
+ }
+
+ kernel_vector_end();
+
+ /* Don't divide by zero. */
+ if (!word_cycles || !byte_cycles) {
+ pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n",
+ cpu);
+
+ return;
+ }
+
+ if (word_cycles < byte_cycles)
+ speed = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST;
+
+ ratio = div_u64((byte_cycles * 100), word_cycles);
+ pr_info("cpu%d: Ratio of vector byte access time to vector unaligned word access is %d.%02d, unaligned accesses are %s\n",
+ cpu,
+ ratio / 100,
+ ratio % 100,
+ (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow");
+
+ per_cpu(vector_misaligned_access, cpu) = speed;
+}
+
+static int riscv_online_cpu_vec(unsigned int cpu)
+{
+ if (!has_vector())
+ return 0;
+
+ if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED)
+ return 0;
+
+ check_vector_unaligned_access_emulated(NULL);
+ check_vector_unaligned_access(NULL);
return 0;
}
-#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */
-static int check_unaligned_access_all_cpus(void)
+
+/* Measure unaligned access speed on all CPUs present at boot in parallel. */
+static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
{
- check_unaligned_access_emulated_all_cpus();
+ schedule_on_each_cpu(check_vector_unaligned_access);
+
+ /*
+ * Setup hotplug callbacks for any new CPUs that come online or go
+ * offline.
+ */
+ cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
+ riscv_online_cpu_vec, NULL);
return 0;
}
+#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */
+static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused)
+{
+ return 0;
+}
#endif
+static int check_unaligned_access_all_cpus(void)
+{
+ bool all_cpus_emulated, all_cpus_vec_unsupported;
+
+ all_cpus_emulated = check_unaligned_access_emulated_all_cpus();
+ all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus();
+
+ if (!all_cpus_vec_unsupported &&
+ IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
+ kthread_run(vec_check_unaligned_access_speed_all_cpus,
+ NULL, "vec_check_unaligned_access_speed_all_cpus");
+ }
+
+ if (!all_cpus_emulated)
+ return check_unaligned_access_speed_all_cpus();
+
+ return 0;
+}
+
arch_initcall(check_unaligned_access_all_cpus);