#ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS staticint check_unaligned_access(void *param)
{ int cpu = smp_processor_id();
u64 start_cycles, end_cycles;
u64 word_cycles;
u64 byte_cycles; int ratio; unsignedlong start_jiffies, now; struct page *page = param; void *dst; void *src; long speed = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW;
if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) return 0;
/* Make an unaligned destination buffer. */
dst = (void *)((unsignedlong)page_address(page) | 0x1); /* Unalign src as well, but differently (off by 1 + 2 = 3). */
src = dst + (MISALIGNED_BUFFER_SIZE / 2);
src += 2;
word_cycles = -1ULL; /* Do a warmup. */
__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
preempt_disable();
start_jiffies = jiffies; while ((now = jiffies) == start_jiffies)
cpu_relax();
/* * For a fixed amount of time, repeatedly try the function, and take * the best time in cycles as the measurement.
*/ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
start_cycles = get_cycles64(); /* Ensure the CSR read can't reorder WRT to the copy. */
mb();
__riscv_copy_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); /* Ensure the copy ends before the end time is snapped. */
mb();
end_cycles = get_cycles64(); if ((end_cycles - start_cycles) < word_cycles)
word_cycles = end_cycles - start_cycles;
}
/* Don't divide by zero. */ if (!word_cycles || !byte_cycles) {
pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned access speed\n",
cpu);
return 0;
}
if (word_cycles < byte_cycles)
speed = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST;
ratio = div_u64((byte_cycles * 100), word_cycles);
pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n",
cpu,
ratio / 100,
ratio % 100,
(speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) ? "fast" : "slow");
per_cpu(misaligned_access_speed, cpu) = speed;
/* * Set the value of fast_misaligned_access of a CPU. These operations * are atomic to avoid race conditions.
*/ if (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST)
cpumask_set_cpu(cpu, &fast_misaligned_access); else
cpumask_clear_cpu(cpu, &fast_misaligned_access);
if (smp_processor_id() != 0)
check_unaligned_access(pages[cpu]);
}
/* Measure unaligned access speed on all CPUs present at boot in parallel. */ staticvoid __init check_unaligned_access_speed_all_cpus(void)
{ unsignedint cpu; unsignedint cpu_count = num_possible_cpus(); struct page **bufs = kcalloc(cpu_count, sizeof(*bufs), GFP_KERNEL);
if (!bufs) {
pr_warn("Allocation failure, not measuring misaligned performance\n"); return;
}
/* * Allocate separate buffers for each CPU so there's no fighting over * cache lines.
*/
for_each_cpu(cpu, cpu_online_mask) {
bufs[cpu] = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); if (!bufs[cpu]) {
pr_warn("Allocation failure, not measuring misaligned performance\n"); goto out;
}
}
/* Check everybody except 0, who stays behind to tend jiffies. */
on_each_cpu(check_unaligned_access_nonboot_cpu, bufs, 1);
staticvoid modify_unaligned_access_branches(cpumask_t *mask, int weight)
{ if (cpumask_weight(mask) == weight)
static_branch_enable_cpuslocked(&fast_unaligned_access_speed_key); else
static_branch_disable_cpuslocked(&fast_unaligned_access_speed_key);
}
staticvoid set_unaligned_access_static_branches_except_cpu(int cpu)
{ /* * Same as set_unaligned_access_static_branches, except excludes the * given CPU from the result. When a CPU is hotplugged into an offline * state, this function is called before the CPU is set to offline in * the cpumask, and thus the CPU needs to be explicitly excluded.
*/
staticvoid set_unaligned_access_static_branches(void)
{ /* * This will be called after check_unaligned_access_all_cpus so the * result of unaligned access speed for all CPUs will be available. * * To avoid the number of online cpus changing between reading * cpu_online_mask and calling num_online_cpus, cpus_read_lock must be * held before calling this function.
*/
staticint riscv_online_cpu(unsignedint cpu)
{ int ret = cpu_online_unaligned_access_init(cpu);
if (ret) return ret;
/* We are already set since the last check */ if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) { gotoexit;
} elseif (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) {
per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param; gotoexit;
}
#ifdef CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS staticvoid check_vector_unaligned_access(struct work_struct *work __always_unused)
{ int cpu = smp_processor_id();
u64 start_cycles, end_cycles;
u64 word_cycles;
u64 byte_cycles; int ratio; unsignedlong start_jiffies, now; struct page *page; void *dst; void *src; long speed = RISCV_HWPROBE_MISALIGNED_VECTOR_SLOW;
if (per_cpu(vector_misaligned_access, cpu) != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) return;
page = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); if (!page) {
pr_warn("Allocation failure, not measuring vector misaligned performance\n"); return;
}
/* Make an unaligned destination buffer. */
dst = (void *)((unsignedlong)page_address(page) | 0x1); /* Unalign src as well, but differently (off by 1 + 2 = 3). */
src = dst + (MISALIGNED_BUFFER_SIZE / 2);
src += 2;
word_cycles = -1ULL;
/* Do a warmup. */
kernel_vector_begin();
__riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE);
start_jiffies = jiffies; while ((now = jiffies) == start_jiffies)
cpu_relax();
/* * For a fixed amount of time, repeatedly try the function, and take * the best time in cycles as the measurement.
*/ while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
start_cycles = get_cycles64(); /* Ensure the CSR read can't reorder WRT to the copy. */
mb();
__riscv_copy_vec_words_unaligned(dst, src, MISALIGNED_COPY_SIZE); /* Ensure the copy ends before the end time is snapped. */
mb();
end_cycles = get_cycles64(); if ((end_cycles - start_cycles) < word_cycles)
word_cycles = end_cycles - start_cycles;
}
while (time_before(jiffies, now + (1 << MISALIGNED_ACCESS_JIFFIES_LG2))) {
start_cycles = get_cycles64(); /* Ensure the CSR read can't reorder WRT to the copy. */
mb();
__riscv_copy_vec_bytes_unaligned(dst, src, MISALIGNED_COPY_SIZE); /* Ensure the copy ends before the end time is snapped. */
mb();
end_cycles = get_cycles64(); if ((end_cycles - start_cycles) < byte_cycles)
byte_cycles = end_cycles - start_cycles;
}
kernel_vector_end();
/* Don't divide by zero. */ if (!word_cycles || !byte_cycles) {
pr_warn("cpu%d: rdtime lacks granularity needed to measure unaligned vector access speed\n",
cpu);
goto free;
}
if (word_cycles < byte_cycles)
speed = RISCV_HWPROBE_MISALIGNED_VECTOR_FAST;
ratio = div_u64((byte_cycles * 100), word_cycles);
pr_info("cpu%d: Ratio of vector byte access time to vector unaligned word access is %d.%02d, unaligned accesses are %s\n",
cpu,
ratio / 100,
ratio % 100,
(speed == RISCV_HWPROBE_MISALIGNED_VECTOR_FAST) ? "fast" : "slow");
staticint __init check_unaligned_access_all_cpus(void)
{ int cpu;
unaligned_access_init();
if (unaligned_scalar_speed_param != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) {
pr_info("scalar unaligned access speed set to '%s' (%lu) by command line\n",
speed_str[unaligned_scalar_speed_param], unaligned_scalar_speed_param);
for_each_online_cpu(cpu)
per_cpu(misaligned_access_speed, cpu) = unaligned_scalar_speed_param;
} elseif (!check_unaligned_access_emulated_all_cpus()) {
check_unaligned_access_speed_all_cpus();
}
if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) { if (!has_vector() &&
unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED) {
pr_warn("vector support is not available, ignoring unaligned_vector_speed=%s\n",
speed_str[unaligned_vector_speed_param]);
} else {
pr_info("vector unaligned access speed set to '%s' (%lu) by command line\n",
speed_str[unaligned_vector_speed_param], unaligned_vector_speed_param);
}
}
if (!has_vector())
unaligned_vector_speed_param = RISCV_HWPROBE_MISALIGNED_VECTOR_UNSUPPORTED;
if (unaligned_vector_speed_param != RISCV_HWPROBE_MISALIGNED_VECTOR_UNKNOWN) {
for_each_online_cpu(cpu)
per_cpu(vector_misaligned_access, cpu) = unaligned_vector_speed_param;
} elseif (!check_vector_unaligned_access_emulated_all_cpus() &&
IS_ENABLED(CONFIG_RISCV_PROBE_VECTOR_UNALIGNED_ACCESS)) {
riscv_hwprobe_register_async_probe(); if (IS_ERR(kthread_run(vec_check_unaligned_access_speed_all_cpus,
NULL, "vec_check_unaligned_access_speed_all_cpus"))) {
pr_warn("Failed to create vec_unalign_check kthread\n");
riscv_hwprobe_complete_async_probe();
}
}
/* * Setup hotplug callbacks for any new CPUs that come online or go * offline.
*/
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
riscv_online_cpu, riscv_offline_cpu);
cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "riscv:online",
riscv_online_cpu_vec, NULL);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.