/* * 32 MiB is max size that gets well over 100 iterations on 509 slots. * Considering that each slot needs to have at least one page up to * 8194 slots in use can then be tested (although with slightly * limited resolution).
*/ #define MEM_SIZE_MAP (SZ_32M + MEM_EXTRA_SIZE) #define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - MEM_EXTRA_SIZE)
/* * 128 MiB is min size that fills 32k slots with at least one page in each * while at the same time gets 100+ iterations in such test * * 2 MiB chunk size like a typical huge page
*/ #define MEM_TEST_UNMAP_SIZE SZ_128M #define MEM_TEST_UNMAP_CHUNK_SIZE SZ_2M
/* * For the move active test the middle of the test area is placed on * a memslot boundary: half lies in the memslot being moved, half in * other memslot(s). * * We have different number of memory slots, excluding the reserved * memory slot 0, on various architectures and configurations. The * memory size in this test is calculated by picking the maximal * last memory slot's memory size, with alignment to the largest * supported page size (64KB). In this way, the selected memory * size for this test is compatible with test_memslot_move_prepare(). * * architecture slots memory-per-slot memory-on-last-slot * -------------------------------------------------------------- * x86-4KB 32763 16KB 160KB * arm64-4KB 32766 16KB 112KB * arm64-16KB 32766 16KB 112KB * arm64-64KB 8192 64KB 128KB
*/ #define MEM_TEST_MOVE_SIZE (3 * SZ_64K) #define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE)
static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE, "invalid move test region size");
/* * Technically, we need also for the atomic bool to be address-free, which * is recommended, but not strictly required, by C11 for lockless * implementations. * However, in practice both GCC and Clang fulfill this requirement on * all KVM-supported platforms.
*/
static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
/* * noinline so we can easily see how much time the host spends waiting * for the guest. * For the same reason use alarm() instead of polling clock_gettime() * to implement a wait timeout.
*/ static noinline void host_perform_sync(struct sync_area *sync)
{
alarm(10);
atomic_store_explicit(&sync->sync_flag, true, memory_order_release); while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
;
for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
ptr += page_size)
*(uint64_t *)ptr = MEM_TEST_VAL_1;
/* * No host sync here since the MMIO exits are so expensive * that the host would spend most of its time waiting for * the guest and so instead of measuring memslot move * performance we would measure the performance and * likelihood of MMIO exits
*/
}
/* * We can afford to access (map) just a small number of pages * per host sync as otherwise the host will spend * a significant amount of its time waiting for the guest * (instead of doing unmap operations), so this will * effectively turn this test into a map performance test. * * Just access a single page to be on the safe side.
*/
*(uint64_t *)ptr = MEM_TEST_VAL_1;
/* * Unmap the second half of the test area while guest writes to (maps) * the first half.
*/
test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2);
/* * Wait for the guest to finish writing the first half of the test * area, verify the written value on the first and the last page of * this area and then unmap it. * Meanwhile, the guest is writing to (mapping) the second half of * the test area.
*/
host_perform_sync(sync);
test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1);
test_memslot_do_unmap(data, 0, guest_pages / 2);
/* * Wait for the guest to finish writing the second half of the test * area and verify the written value on the first and the last page * of this area. * The area will be unmapped at the beginning of the next loop * iteration. * Meanwhile, the guest is writing to (mapping) the first half of * the test area.
*/
host_perform_sync(sync);
test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2);
}
/* * Wait for the guest to finish mapping page(s) in the first half * of the test area, verify the written value and then perform unmap * of this area. * Meanwhile, the guest is writing to (mapping) page(s) in the second * half of the test area.
*/
host_perform_sync(sync);
test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1); for (ctr = 0; ctr < guest_pages / 2; ctr += chunk)
test_memslot_do_unmap(data, ctr, chunk);
/* Likewise, but for the opposite host / guest areas */
host_perform_sync(sync);
test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2); for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk)
test_memslot_do_unmap(data, ctr, chunk);
}
struct test_args { int tfirst; int tlast; int nslots; int seconds; int runs;
};
staticvoid help(char *name, struct test_args *targs)
{ int ctr;
pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
name);
pr_info(" -h: print this help screen.\n");
pr_info(" -v: enable verbose mode (not for benchmarking).\n");
pr_info(" -d: enable extra debug checks.\n");
pr_info(" -q: Disable memslot zap quirk during memslot move.\n");
pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
targs->nslots);
pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
targs->tfirst, NTESTS - 1);
pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
targs->tlast, NTESTS - 1);
pr_info(" -l: specify the test length in seconds (currently: %i)\n",
targs->seconds);
pr_info(" -r: specify the number of runs per test (currently: %i)\n",
targs->runs);
while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) { switch (opt) { case'h': default:
help(argv[0], targs); returnfalse; case'v':
verbose = true; break; case'd':
map_unmap_verify = true; break; #ifdef __x86_64__ case'q':
disable_slot_zap_quirk = true;
TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) &
KVM_X86_QUIRK_SLOT_ZAP_ALL); break; #endif case's':
targs->nslots = atoi_paranoid(optarg); if (targs->nslots <= 1 && targs->nslots != -1) {
pr_info("Slot count cap must be larger than 1 or -1 for no cap\n"); returnfalse;
} break; case'f':
targs->tfirst = atoi_non_negative("First test", optarg); break; case'e':
targs->tlast = atoi_non_negative("Last test", optarg); if (targs->tlast >= NTESTS) {
pr_info("Last test to run has to be non-negative and less than %zu\n",
NTESTS); returnfalse;
} break; case'l':
targs->seconds = atoi_non_negative("Test length", optarg); break; case'r':
targs->runs = atoi_positive("Runs per test", optarg); break;
}
}
if (optind < argc) {
help(argv[0], targs); returnfalse;
}
if (targs->tfirst > targs->tlast) {
pr_info("First test to run cannot be greater than the last test to run\n"); returnfalse;
}
max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS); if (max_mem_slots <= 1) {
pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n"); returnfalse;
}
if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
&result.nloops,
&result.slot_runtime, &result.guest_runtime)) { if (maxslots)
pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
maxslots); else
pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
returnfalse;
}
pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec); if (!result.nloops) {
pr_info("No full loops done - too short test time or system too loaded?\n"); returntrue;
}
/* * Only rank the slot setup time for tests using the whole test memory * area so they are comparable
*/ if (!data->mem_size &&
(!rbestslottime->slottimens ||
result.slottimens < rbestslottime->slottimens))
*rbestslottime = result; if (!rbestruntime->runtimens ||
result.runtimens < rbestruntime->runtimens)
*rbestruntime = result;
for (runctr = 0; runctr < targs.runs; runctr++) if (!test_loop(data, &targs,
&rbestslottime, &rbestruntime)) break;
if (rbestruntime.runtimens)
pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
rbestruntime.iter_runtime.tv_sec,
rbestruntime.iter_runtime.tv_nsec,
rbestruntime.nloops);
}
if (rbestslottime.slottimens)
pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
rbestslottime.slot_runtime.tv_sec,
rbestslottime.slot_runtime.tv_nsec);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.