// SPDX-License-Identifier: GPL-2.0+ // // Scalability test comparing RCU vs other mechanisms // for acquiring references on objects. // // Copyright (C) Google, 2020. // // Author: Joel Fernandes <joel@joelfernandes.org>
// Number of seconds to extend warm-up and cool-down for multiple guest OSes
torture_param(long, guest_os_delay, 0, "Number of seconds to extend warm-up/cool-down for multiple guest OSes."); // Wait until there are multiple CPUs before starting test.
torture_param(int, holdoff, IS_BUILTIN(CONFIG_RCU_REF_SCALE_TEST) ? 10 : 0, "Holdoff time before test start (s)"); // Number of typesafe_lookup structures, that is, the degree of concurrency.
torture_param(long, lookup_instances, 0, "Number of typesafe_lookup structures."); // Number of loops per experiment, all readers execute operations concurrently.
torture_param(int, loops, 10000, "Number of loops per experiment."); // Number of readers, with -1 defaulting to about 75% of the CPUs.
torture_param(int, nreaders, -1, "Number of readers, -1 for 75% of CPUs."); // Number of runs.
torture_param(int, nruns, 30, "Number of experiments to run."); // Reader delay in nanoseconds, 0 for no delay.
torture_param(int, readdelay, 0, "Read-side delay in nanoseconds.");
// Definitions for RCU Tasks ref scale testing: Empty read markers. // These definitions also work for RCU Rude readers. staticvoid rcu_tasks_ref_scale_read_section(constint nloops)
{ int i;
for (i = nloops; i >= 0; i--) continue;
}
staticvoid rcu_tasks_ref_scale_delay_section(constint nloops, constint udl, constint ndl)
{ int i;
for (i = nloops; i >= 0; i--)
un_delay(udl, ndl);
}
staticvoid ref_acqrel_section(constint nloops)
{ unsignedlong x; int i;
preempt_disable(); for (i = nloops; i >= 0; i--) {
x = smp_load_acquire(this_cpu_ptr(&test_acqrel));
smp_store_release(this_cpu_ptr(&test_acqrel), x + 1);
}
preempt_enable();
}
preempt_disable(); for (i = nloops; i >= 0; i--) {
x = smp_load_acquire(this_cpu_ptr(&test_acqrel));
un_delay(udl, ndl);
smp_store_release(this_cpu_ptr(&test_acqrel), x + 1);
}
preempt_enable();
}
// Item to look up in a typesafe manner. Array of pointers to these. struct refscale_typesafe {
atomic_t rts_refctr; // Used by all flavors
spinlock_t rts_lock;
seqlock_t rts_seqlock; unsignedint a; unsignedint b;
};
// Conditionally release an explicit in-structure sequence lock. Return // true if this release was successful, that is, if no retry is required. staticbool typesafe_seqlock_release(struct refscale_typesafe *rtsp, unsignedint start)
{ return !read_seqretry(&rtsp->rts_seqlock, start);
}
// Do a read-side critical section with the specified delay in // microseconds and nanoseconds inserted so as to increase probability // of failure. staticvoid typesafe_delay_section(constint nloops, constint udl, constint ndl)
{ unsignedint a; unsignedint b; int i; long idx; struct refscale_typesafe *rtsp; unsignedint start;
for (i = nloops; i >= 0; i--) {
preempt_disable();
idx = torture_random(this_cpu_ptr(&refscale_rand)) % rtsarray_size;
preempt_enable();
retry:
rcu_read_lock();
rtsp = rcu_dereference(rtsarray[idx]);
a = READ_ONCE(rtsp->a); if (!rts_acquire(rtsp, &start)) {
rcu_read_unlock(); goto retry;
} if (a != READ_ONCE(rtsp->a)) {
(void)rts_release(rtsp, start);
rcu_read_unlock(); goto retry;
}
un_delay(udl, ndl);
b = READ_ONCE(rtsp->a); // Remember, seqlock read-side release can fail. if (!rts_release(rtsp, start)) {
rcu_read_unlock(); goto retry;
}
WARN_ONCE(a != b, "Re-read of ->a changed from %u to %u.\n", a, b);
b = rtsp->b;
rcu_read_unlock();
WARN_ON_ONCE(a * a != b);
}
}
// Because the acquisition and release methods are expensive, there // is no point in optimizing away the un_delay() function's two checks. // Thus simply define typesafe_read_section() as a simple wrapper around // typesafe_delay_section(). staticvoid typesafe_read_section(constint nloops)
{
typesafe_delay_section(nloops, 0, 0);
}
// Allocate and initialize one refscale_typesafe structure. staticstruct refscale_typesafe *typesafe_alloc_one(void)
{ struct refscale_typesafe *rtsp;
// Slab-allocator constructor for refscale_typesafe structures created // out of a new slab of system memory. staticvoid refscale_typesafe_ctor(void *rtsp_in)
{ struct refscale_typesafe *rtsp = rtsp_in;
VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: task started", me);
WARN_ON_ONCE(set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids)));
set_user_nice(current, MAX_NICE);
atomic_inc(&n_init); if (holdoff)
schedule_timeout_interruptible(holdoff * HZ);
repeat:
VERBOSE_SCALEOUT_BATCH("ref_scale_reader %ld: waiting to start next experiment on cpu %d", me, raw_smp_processor_id());
// Wait for signal that this reader can start.
wait_event(rt->wq, (atomic_read(&nreaders_exp) && smp_load_acquire(&rt->start_reader)) ||
torture_must_stop());
if (torture_must_stop()) goto end;
// Make sure that the CPU is affinitized appropriately during testing.
WARN_ON_ONCE(raw_smp_processor_id() != me % nr_cpu_ids);
WRITE_ONCE(rt->start_reader, 0); if (!atomic_dec_return(&n_started)) while (atomic_read_acquire(&n_started))
cpu_relax();
// To reduce noise, do an initial cache-warming invocation, check // in, and then keep warming until everyone has checked in.
rcu_scale_one_reader(); if (!atomic_dec_return(&n_warmedup)) while (atomic_read_acquire(&n_warmedup))
rcu_scale_one_reader(); // Also keep interrupts disabled. This also has the effect // of preventing entries into slow path for rcu_read_unlock().
local_irq_save(flags);
start = ktime_get_mono_fast_ns();
rt->last_duration_ns = WARN_ON_ONCE(duration < 0) ? 0 : duration; // To reduce runtime-skew noise, do maintain-load invocations until // everyone is done. if (!atomic_dec_return(&n_cooleddown)) while (atomic_read_acquire(&n_cooleddown))
rcu_scale_one_reader();
if (atomic_dec_and_test(&nreaders_exp))
wake_up(&main_wq);
if (!torture_must_stop()) goto repeat;
end:
torture_kthread_stopping("ref_scale_reader"); return 0;
}
staticvoid reset_readers(void)
{ int i; struct reader_task *rt;
for (i = 0; i < nreaders; i++) {
rt = &(reader_tasks[i]);
rt->last_duration_ns = 0;
}
}
// Print the results of each reader and return the sum of all their durations. static u64 process_durations(int n)
{ int i; struct reader_task *rt; struct seq_buf s; char *buf;
u64 sum = 0;
for (i = 0; i < n && !torture_must_stop(); i++) {
rt = &(reader_tasks[i]);
if (i % 5 == 0)
seq_buf_putc(&s, '\n');
if (seq_buf_used(&s) >= 800) {
pr_alert("%s", seq_buf_str(&s));
seq_buf_clear(&s);
}
seq_buf_printf(&s, "%d: %llu\t", i, rt->last_duration_ns);
sum += rt->last_duration_ns;
}
pr_alert("%s\n", seq_buf_str(&s));
kfree(buf); return sum;
}
// The main_func is the main orchestrator, it performs a bunch of // experiments. For every experiment, it orders all the readers // involved to start and waits for them to finish the experiment. It // then reads their timestamps and starts the next experiment. Each // experiment progresses from 1 concurrent reader to N of them at which // point all the timestamps are printed. staticint main_func(void *arg)
{ int exp, r; char buf1[64]; char *buf;
u64 *result_avg;
// Do scale-type-specific cleanup operations. if (cur_ops->cleanup != NULL)
cur_ops->cleanup();
torture_cleanup_end();
}
// Shutdown kthread. Just waits to be awakened, then shuts down system. staticint
ref_scale_shutdown(void *arg)
{
wait_event_idle(shutdown_wq, shutdown_start);
smp_mb(); // Wake before output.
ref_scale_cleanup();
kernel_power_off();
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.