diff options
Diffstat (limited to 'arch/riscv/kernel/unaligned_access_speed.c')
-rw-r--r-- | arch/riscv/kernel/unaligned_access_speed.c | 156 |
1 files changed, 148 insertions, 8 deletions
diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index 160628a2116d..91f189cf1611 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -6,11 +6,13 @@ #include <linux/cpu.h> #include <linux/cpumask.h> #include <linux/jump_label.h> +#include <linux/kthread.h> #include <linux/mm.h> #include <linux/smp.h> #include <linux/types.h> #include <asm/cpufeature.h> #include <asm/hwprobe.h> +#include <asm/vector.h> #include "copy-unaligned.h" @@ -19,7 +21,8 @@ #define MISALIGNED_BUFFER_ORDER get_order(MISALIGNED_BUFFER_SIZE) #define MISALIGNED_COPY_SIZE ((MISALIGNED_BUFFER_SIZE / 2) - 0x80) -DEFINE_PER_CPU(long, misaligned_access_speed); +DEFINE_PER_CPU(long, misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; +DEFINE_PER_CPU(long, vector_misaligned_access) = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED; #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS static cpumask_t fast_misaligned_access; @@ -191,6 +194,7 @@ static int riscv_online_cpu(unsigned int cpu) if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) goto exit; + check_unaligned_access_emulated(NULL); buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); if (!buf) { pr_warn("Allocation failure, not measuring misaligned performance\n"); @@ -259,23 +263,159 @@ out: kfree(bufs); return 0; } +#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ +static int check_unaligned_access_speed_all_cpus(void) +{ + return 0; +} +#endif -static int check_unaligned_access_all_cpus(void) +#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS +static void check_vector_unaligned_access(struct work_struct *work __always_unused) { - bool all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); + int cpu = smp_processor_id(); + u64 start_cycles, end_cycles; + u64 word_cycles; + u64 byte_cycles; + int ratio; + unsigned long start_jiffies, now; + struct page *page; + void *dst; + void *src; + long speed = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW; - if (!all_cpus_emulated) - return check_unaligned_access_speed_all_cpus(); + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) + return; + + page = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); + if (!page) { + pr_warn("Allocation failure, not measuring vector misaligned performance\n"); + return; + } + + /* Make an unaligned destination buffer. */ + dst = (void *)((unsigned long)page_address(page) | 0x1); + /* Unalign src as well, but differently (off by 1 + 2 = 3). */ + src = dst + (MISALIGNED_BUFFER_SIZE / 2); + src += 2; + word_cycles = -1ULL; + + /* Do a warmup. */ + kernel_vector_begin(); + __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + /* + * For a fixed amount of time, repeatedly try the function, and take + * the best time in cycles as the measurement. + */ + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < word_cycles) + word_cycles = end_cycles - start_cycles; + } + + byte_cycles = -1ULL; + __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + start_jiffies = jiffies; + while ((now = jiffies) == start_jiffies) + cpu_relax(); + + while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) { + start_cycles = get_cycles64(); + /* Ensure the CSR read can't reorder WRT to the copy. */ + mb(); + __riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); + /* Ensure the copy ends before the end time is snapped. */ + mb(); + end_cycles = get_cycles64(); + if ((end_cycles - start_cycles) < byte_cycles) + byte_cycles = end_cycles - start_cycles; + } + + kernel_vector_end(); + + /* Don't divide by zero. */ + if (!word_cycles || !byte_cycles) { + pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n", + cpu); + + return; + } + + if (word_cycles < byte_cycles) + speed = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST; + + ratio = div_u64((byte_cycles * 100), word_cycles); + pr_info("cpu%d: Ratio of vector byte access time to vector unaligned word access is %d.%02d, unaligned accesses are %s\n", + cpu, + ratio / 100, + ratio % 100, + (speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow"); + + per_cpu(vector_misaligned_access, cpu) = speed; +} + +static int riscv_online_cpu_vec(unsigned int cpu) +{ + if (!has_vector()) + return 0; + + if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) + return 0; + + check_vector_unaligned_access_emulated(NULL); + check_vector_unaligned_access(NULL); return 0; } -#else /* CONFIG_RISCV_PROBE_UNALIGNED_ACCESS */ -static int check_unaligned_access_all_cpus(void) + +/* Measure unaligned access speed on all CPUs present at boot in parallel. */ +static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) { - check_unaligned_access_emulated_all_cpus(); + schedule_on_each_cpu(check_vector_unaligned_access); + + /* + * Setup hotplug callbacks for any new CPUs that come online or go + * offline. + */ + cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online", + riscv_online_cpu_vec, NULL); return 0; } +#else /* CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS */ +static int vec_check_unaligned_access_speed_all_cpus(void *unused __always_unused) +{ + return 0; +} #endif +static int check_unaligned_access_all_cpus(void) +{ + bool all_cpus_emulated, all_cpus_vec_unsupported; + + all_cpus_emulated = check_unaligned_access_emulated_all_cpus(); + all_cpus_vec_unsupported = check_vector_unaligned_access_emulated_all_cpus(); + + if (!all_cpus_vec_unsupported && + IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) { + kthread_run(vec_check_unaligned_access_speed_all_cpus, + NULL, "vec_check_unaligned_access_speed_all_cpus"); + } + + if (!all_cpus_emulated) + return check_unaligned_access_speed_all_cpus(); + + return 0; +} + arch_initcall(check_unaligned_access_all_cpus); |