if (!opt_disable_rseq && thread_data->reg &&
rseq_register_current_thread())
abort();
reps = thread_data->reps; for (i = 0; i < reps; i++) { int cpu = rseq_this_cpu_lock(&data->lock);
data->c[cpu].count++;
rseq_percpu_unlock(&data->lock, cpu); #ifndef BENCHMARK if (i != 0 && !(i % (reps / 10)))
printf_verbose("tid %d: count %lld\n",
(int) rseq_gettid(), i); #endif
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
(int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && thread_data->reg &&
rseq_unregister_current_thread())
abort(); return NULL;
}
/* * A simple test which implements a sharded counter using a per-cpu * lock. Obviously real applications might prefer to simply use a * per-cpu increment; however, this is reasonable for a test and the * lock can be extended to synchronize more complicated operations.
*/ void test_percpu_spinlock(void)
{ constint num_threads = opt_threads; int i, ret;
uint64_t sum;
pthread_t test_threads[num_threads]; struct spinlock_test_data data; struct spinlock_thread_test_data thread_data[num_threads];
memset(&data, 0, sizeof(data)); for (i = 0; i < num_threads; i++) {
thread_data[i].reps = opt_reps; if (opt_disable_mod <= 0 || (i % opt_disable_mod))
thread_data[i].reg = 1; else
thread_data[i].reg = 0;
thread_data[i].data = &data;
ret = pthread_create(&test_threads[i], NULL,
test_percpu_spinlock_thread,
&thread_data[i]); if (ret) {
errno = ret;
perror("pthread_create");
abort();
}
}
for (i = 0; i < num_threads; i++) {
ret = pthread_join(test_threads[i], NULL); if (ret) {
errno = ret;
perror("pthread_join");
abort();
}
}
sum = 0; for (i = 0; i < CPU_SETSIZE; i++)
sum += data.c[i].count;
void this_cpu_list_push(struct percpu_list *list, struct percpu_list_node *node, int *_cpu)
{ int cpu;
for (;;) {
intptr_t *targetptr, newval, expect; int ret;
cpu = get_current_cpu_id(); /* Load list->c[cpu].head with single-copy atomicity. */
expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
newval = (intptr_t)node;
targetptr = (intptr_t *)&list->c[cpu].head;
node->next = (struct percpu_list_node *)expect;
ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
targetptr, expect, newval, cpu); if (rseq_likely(!ret)) break; /* Retry if comparison fails or rseq aborts. */
} if (_cpu)
*_cpu = cpu;
}
/* * Unlike a traditional lock-less linked list; the availability of a * rseq primitive allows us to implement pop without concerns over * ABA-type races.
*/ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list, int *_cpu)
{ struct percpu_list_node *node = NULL; int cpu;
for (;;) { struct percpu_list_node *head;
intptr_t *targetptr, expectnot, *load; long offset; int ret;
cpu = get_current_cpu_id();
targetptr = (intptr_t *)&list->c[cpu].head;
expectnot = (intptr_t)NULL;
offset = offsetof(struct percpu_list_node, next);
load = (intptr_t *)&head;
ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU,
targetptr, expectnot,
offset, load, cpu); if (rseq_likely(!ret)) {
node = head; break;
} if (ret > 0) break; /* Retry if rseq aborts. */
} if (_cpu)
*_cpu = cpu; return node;
}
/* * __percpu_list_pop is not safe against concurrent accesses. Should * only be used on lists that are not concurrently modified.
*/ struct percpu_list_node *__percpu_list_pop(struct percpu_list *list, int cpu)
{ struct percpu_list_node *node;
if (!opt_disable_rseq && rseq_register_current_thread())
abort();
reps = opt_reps; for (i = 0; i < reps; i++) { struct percpu_list_node *node;
node = this_cpu_list_pop(list, NULL); if (opt_yield)
sched_yield(); /* encourage shuffling */ if (node)
this_cpu_list_push(list, node, NULL);
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
(int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
return NULL;
}
/* Simultaneous modification to a per-cpu linked list from many threads. */ void test_percpu_list(void)
{ constint num_threads = opt_threads; int i, j, ret;
uint64_t sum = 0, expected_sum = 0; struct percpu_list list;
pthread_t test_threads[num_threads];
cpu_set_t allowed_cpus;
memset(&list, 0, sizeof(list));
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; for (j = 1; j <= 100; j++) { struct percpu_list_node *node;
for (i = 0; i < num_threads; i++) {
ret = pthread_create(&test_threads[i], NULL,
test_percpu_list_thread, &list); if (ret) {
errno = ret;
perror("pthread_create");
abort();
}
}
for (i = 0; i < num_threads; i++) {
ret = pthread_join(test_threads[i], NULL); if (ret) {
errno = ret;
perror("pthread_join");
abort();
}
}
for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_list_node *node;
if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue;
while ((node = __percpu_list_pop(&list, i))) {
sum += node->data;
free(node);
}
}
/* * All entries should now be accounted for (unless some external * actor is interfering with our allowed affinity while this * test is running).
*/
assert(sum == expected_sum);
}
bool this_cpu_buffer_push(struct percpu_buffer *buffer, struct percpu_buffer_node *node, int *_cpu)
{ bool result = false; int cpu;
for (;;) {
intptr_t *targetptr_spec, newval_spec;
intptr_t *targetptr_final, newval_final;
intptr_t offset; int ret;
cpu = get_current_cpu_id();
offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); if (offset == buffer->c[cpu].buflen) break;
newval_spec = (intptr_t)node;
targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
newval_final = offset + 1;
targetptr_final = &buffer->c[cpu].offset;
ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU,
targetptr_final, offset, targetptr_spec,
newval_spec, newval_final, cpu); if (rseq_likely(!ret)) {
result = true; break;
} /* Retry if comparison fails or rseq aborts. */
} if (_cpu)
*_cpu = cpu; return result;
}
struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer, int *_cpu)
{ struct percpu_buffer_node *head; int cpu;
for (;;) {
intptr_t *targetptr, newval;
intptr_t offset; int ret;
cpu = get_current_cpu_id(); /* Load offset with single-copy atomicity. */
offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); if (offset == 0) {
head = NULL; break;
}
head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
newval = offset - 1;
targetptr = (intptr_t *)&buffer->c[cpu].offset;
ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
targetptr, offset,
(intptr_t *)&buffer->c[cpu].array[offset - 1],
(intptr_t)head, newval, cpu); if (rseq_likely(!ret)) break; /* Retry if comparison fails or rseq aborts. */
} if (_cpu)
*_cpu = cpu; return head;
}
/* * __percpu_buffer_pop is not safe against concurrent accesses. Should * only be used on buffers that are not concurrently modified.
*/ struct percpu_buffer_node *__percpu_buffer_pop(struct percpu_buffer *buffer, int cpu)
{ struct percpu_buffer_node *head;
intptr_t offset;
if (!opt_disable_rseq && rseq_register_current_thread())
abort();
reps = opt_reps; for (i = 0; i < reps; i++) { struct percpu_buffer_node *node;
node = this_cpu_buffer_pop(buffer, NULL); if (opt_yield)
sched_yield(); /* encourage shuffling */ if (node) { if (!this_cpu_buffer_push(buffer, node, NULL)) { /* Should increase buffer size. */
abort();
}
}
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
(int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
return NULL;
}
/* Simultaneous modification to a per-cpu buffer from many threads. */ void test_percpu_buffer(void)
{ constint num_threads = opt_threads; int i, j, ret;
uint64_t sum = 0, expected_sum = 0; struct percpu_buffer buffer;
pthread_t test_threads[num_threads];
cpu_set_t allowed_cpus;
memset(&buffer, 0, sizeof(buffer));
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; /* Worse-case is every item in same CPU. */
buffer.c[i].array =
malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
BUFFER_ITEM_PER_CPU);
assert(buffer.c[i].array);
buffer.c[i].buflen = CPU_SETSIZE * BUFFER_ITEM_PER_CPU; for (j = 1; j <= BUFFER_ITEM_PER_CPU; j++) { struct percpu_buffer_node *node;
expected_sum += j;
/* * We could theoretically put the word-sized * "data" directly in the buffer. However, we * want to model objects that would not fit * within a single word, so allocate an object * for each node.
*/
node = malloc(sizeof(*node));
assert(node);
node->data = j;
buffer.c[i].array[j - 1] = node;
buffer.c[i].offset++;
}
}
for (i = 0; i < num_threads; i++) {
ret = pthread_create(&test_threads[i], NULL,
test_percpu_buffer_thread, &buffer); if (ret) {
errno = ret;
perror("pthread_create");
abort();
}
}
for (i = 0; i < num_threads; i++) {
ret = pthread_join(test_threads[i], NULL); if (ret) {
errno = ret;
perror("pthread_join");
abort();
}
}
for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_buffer_node *node;
if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue;
while ((node = __percpu_buffer_pop(&buffer, i))) {
sum += node->data;
free(node);
}
free(buffer.c[i].array);
}
/* * All entries should now be accounted for (unless some external * actor is interfering with our allowed affinity while this * test is running).
*/
assert(sum == expected_sum);
}
bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer, struct percpu_memcpy_buffer_node item, int *_cpu)
{ bool result = false; int cpu;
for (;;) {
intptr_t *targetptr_final, newval_final, offset; char *destptr, *srcptr;
size_t copylen; int ret;
cpu = get_current_cpu_id(); /* Load offset with single-copy atomicity. */
offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); if (offset == buffer->c[cpu].buflen) break;
destptr = (char *)&buffer->c[cpu].array[offset];
srcptr = (char *)&item; /* copylen must be <= 4kB. */
copylen = sizeof(item);
newval_final = offset + 1;
targetptr_final = &buffer->c[cpu].offset;
ret = rseq_cmpeqv_trymemcpy_storev(
opt_mo, RSEQ_PERCPU,
targetptr_final, offset,
destptr, srcptr, copylen,
newval_final, cpu); if (rseq_likely(!ret)) {
result = true; break;
} /* Retry if comparison fails or rseq aborts. */
} if (_cpu)
*_cpu = cpu; return result;
}
bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, struct percpu_memcpy_buffer_node *item, int *_cpu)
{ bool result = false; int cpu;
for (;;) {
intptr_t *targetptr_final, newval_final, offset; char *destptr, *srcptr;
size_t copylen; int ret;
cpu = get_current_cpu_id(); /* Load offset with single-copy atomicity. */
offset = RSEQ_READ_ONCE(buffer->c[cpu].offset); if (offset == 0) break;
destptr = (char *)item;
srcptr = (char *)&buffer->c[cpu].array[offset - 1]; /* copylen must be <= 4kB. */
copylen = sizeof(*item);
newval_final = offset - 1;
targetptr_final = &buffer->c[cpu].offset;
ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
targetptr_final, offset, destptr, srcptr, copylen,
newval_final, cpu); if (rseq_likely(!ret)) {
result = true; break;
} /* Retry if comparison fails or rseq aborts. */
} if (_cpu)
*_cpu = cpu; return result;
}
/* * __percpu_memcpy_buffer_pop is not safe against concurrent accesses. Should * only be used on buffers that are not concurrently modified.
*/ bool __percpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer, struct percpu_memcpy_buffer_node *item, int cpu)
{
intptr_t offset;
if (!opt_disable_rseq && rseq_register_current_thread())
abort();
reps = opt_reps; for (i = 0; i < reps; i++) { struct percpu_memcpy_buffer_node item; bool result;
result = this_cpu_memcpy_buffer_pop(buffer, &item, NULL); if (opt_yield)
sched_yield(); /* encourage shuffling */ if (result) { if (!this_cpu_memcpy_buffer_push(buffer, item, NULL)) { /* Should increase buffer size. */
abort();
}
}
}
printf_verbose("tid %d: number of rseq abort: %d, signals delivered: %u\n",
(int) rseq_gettid(), nr_abort, signals_delivered); if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
return NULL;
}
/* Simultaneous modification to a per-cpu buffer from many threads. */ void test_percpu_memcpy_buffer(void)
{ constint num_threads = opt_threads; int i, j, ret;
uint64_t sum = 0, expected_sum = 0; struct percpu_memcpy_buffer buffer;
pthread_t test_threads[num_threads];
cpu_set_t allowed_cpus;
memset(&buffer, 0, sizeof(buffer));
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus); for (i = 0; i < CPU_SETSIZE; i++) { if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue; /* Worse-case is every item in same CPU. */
buffer.c[i].array =
malloc(sizeof(*buffer.c[i].array) * CPU_SETSIZE *
MEMCPY_BUFFER_ITEM_PER_CPU);
assert(buffer.c[i].array);
buffer.c[i].buflen = CPU_SETSIZE * MEMCPY_BUFFER_ITEM_PER_CPU; for (j = 1; j <= MEMCPY_BUFFER_ITEM_PER_CPU; j++) {
expected_sum += 2 * j + 1;
/* * We could theoretically put the word-sized * "data" directly in the buffer. However, we * want to model objects that would not fit * within a single word, so allocate an object * for each node.
*/
buffer.c[i].array[j - 1].data1 = j;
buffer.c[i].array[j - 1].data2 = j + 1;
buffer.c[i].offset++;
}
}
for (i = 0; i < num_threads; i++) {
ret = pthread_create(&test_threads[i], NULL,
test_percpu_memcpy_buffer_thread,
&buffer); if (ret) {
errno = ret;
perror("pthread_create");
abort();
}
}
for (i = 0; i < num_threads; i++) {
ret = pthread_join(test_threads[i], NULL); if (ret) {
errno = ret;
perror("pthread_join");
abort();
}
}
for (i = 0; i < CPU_SETSIZE; i++) { struct percpu_memcpy_buffer_node item;
if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus)) continue;
while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
sum += item.data1;
sum += item.data2;
}
free(buffer.c[i].array);
}
/* * All entries should now be accounted for (unless some external * actor is interfering with our allowed affinity while this * test is running).
*/
assert(sum == expected_sum);
}
while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) { /* list_a is "active". */
cpu_a = rand() % CPU_SETSIZE; /* * As list_b is "inactive", we should never see changes * to list_b.
*/ if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) {
fprintf(stderr, "Membarrier test failed\n");
abort();
}
/* Make list_b "active". */
__atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE); if (rseq_membarrier_expedited(cpu_a) &&
errno != ENXIO /* missing CPU */) {
perror("sys_membarrier");
abort();
} /* * Cpu A should now only modify list_b, so the values * in list_a should be stable.
*/
expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE);
cpu_b = rand() % CPU_SETSIZE; /* * As list_a is "inactive", we should never see changes * to list_a.
*/ if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) {
fprintf(stderr, "Membarrier test failed\n");
abort();
}
/* Make list_a "active". */
__atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE); if (rseq_membarrier_expedited(cpu_b) &&
errno != ENXIO /* missing CPU*/) {
perror("sys_membarrier");
abort();
} /* Remember a value from list_b. */
expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE);
}
for (i = 0; i < num_threads; i++) {
ret = pthread_create(&worker_threads[i], NULL,
test_membarrier_worker_thread, &thread_args); if (ret) {
errno = ret;
perror("pthread_create");
abort();
}
}
for (i = 0; i < num_threads; i++) {
ret = pthread_join(worker_threads[i], NULL); if (ret) {
errno = ret;
perror("pthread_join");
abort();
}
}
__atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE);
ret = pthread_join(manager_thread, NULL); if (ret) {
errno = ret;
perror("pthread_join");
abort();
}
} #else/* TEST_MEMBARRIER */ void test_membarrier(void)
{
fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " "Skipping membarrier test.\n");
} #endif
staticvoid show_usage(int argc, char **argv)
{
printf("Usage : %s \n",
argv[0]);
printf("OPTIONS:\n");
printf(" [-1 loops] Number of loops for delay injection 1\n");
printf(" [-2 loops] Number of loops for delay injection 2\n");
printf(" [-3 loops] Number of loops for delay injection 3\n");
printf(" [-4 loops] Number of loops for delay injection 4\n");
printf(" [-5 loops] Number of loops for delay injection 5\n");
printf(" [-6 loops] Number of loops for delay injection 6\n");
printf(" [-7 loops] Number of loops for delay injection 7 (-1 to enable -m)\n");
printf(" [-8 loops] Number of loops for delay injection 8 (-1 to enable -m)\n");
printf(" [-9 loops] Number of loops for delay injection 9 (-1 to enable -m)\n");
printf(" [-m N] Yield/sleep/kill every modulo N (default 0: disabled) (>= 0)\n");
printf(" [-y] Yield\n");
printf(" [-k] Kill thread with signal\n");
printf(" [-s S] S: =0: disabled (default), >0: sleep time (ms)\n");
printf(" [-t N] Number of threads (default 200)\n");
printf(" [-r N] Number of repetitions per thread (default 5000)\n");
printf(" [-d] Disable rseq system call (no initialization)\n");
printf(" [-D M] Disable rseq for each M threads\n");
printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
printf(" [-v] Verbose output.\n");
printf(" [-h] Show this help.\n");
printf("\n");
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.