/* How many host loops to run (one KVM_GET_DIRTY_LOG for each loop) */ #define TEST_HOST_LOOP_N 32UL
/* Interval for each host loop (ms) */ #define TEST_HOST_LOOP_INTERVAL 10UL
/* * Ensure the vCPU is able to perform a reasonable number of writes in each * iteration to provide a lower bound on coverage.
*/ #define TEST_MIN_WRITES_PER_ITERATION 0x100
/* * Guest/Host shared variables. Ensure addr_gva2hva() and/or * sync_global_to/from_guest() are used when accessing from * the host. READ/WRITE_ONCE() should also be used with anything * that may change.
*/ static uint64_t host_page_size; static uint64_t guest_page_size; static uint64_t guest_num_pages; static uint64_t iteration; static uint64_t nr_writes; staticbool vcpu_stop;
/* * Guest physical memory offset of the testing memory slot. * This will be set to the topmost valid physical address minus * the test memory size.
*/ static uint64_t guest_test_phys_mem;
/* * Guest virtual memory offset of the testing memory slot. * Must not conflict with identity mapped test code.
*/ static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
/* * Continuously write to the first 8 bytes of a random pages within * the testing memory region.
*/ staticvoid guest_code(void)
{
uint64_t addr;
#ifdef __s390x__
uint64_t i;
/* * On s390x, all pages of a 1M segment are initially marked as dirty * when a page of the segment is written to for the very first time. * To compensate this specialty in this test, we need to touch all * pages during the first iteration.
*/ for (i = 0; i < guest_num_pages; i++) {
addr = guest_test_virt_mem + i * guest_page_size;
vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
nr_writes++;
} #endif
while (true) { while (!READ_ONCE(vcpu_stop)) {
addr = guest_test_virt_mem;
addr += (guest_random_u64(&guest_rng) % guest_num_pages)
* guest_page_size;
addr = align_down(addr, host_page_size);
/* Points to the test VM memory region on which we track dirty logs */ staticvoid *host_test_mem; static uint64_t host_num_pages;
/* For statistics only */ static uint64_t host_dirty_count; static uint64_t host_clear_count;
/* Whether dirty ring reset is requested, or finished */ static sem_t sem_vcpu_stop; static sem_t sem_vcpu_cont;
/* * This is updated by the vcpu thread to tell the host whether it's a * ring-full event. It should only be read until a sem_wait() of * sem_vcpu_stop and before vcpu continues to run.
*/ staticbool dirty_ring_vcpu_ring_full;
/* * This is only used for verifying the dirty pages. Dirty ring has a very * tricky case when the ring just got full, kvm will do userspace exit due to * ring full. When that happens, the very last PFN is set but actually the * data is not changed (the guest WRITE is not really applied yet), because * we found that the dirty ring is full, refused to continue the vcpu, and * recorded the dirty gfn with the old contents. * * For this specific case, it's safe to skip checking this pfn for this * bit, because it's a redundant bit, and when the write happens later the bit * will be set again. We use this variable to always keep track of the latest * dirty gfn we've collected, so that if a mismatch of data found later in the * verifying process, we let it pass.
*/ static uint64_t dirty_ring_last_page = -1ULL;
/* * In addition to the above, it is possible (especially if this * test is run nested) for the above scenario to repeat multiple times: * * The following can happen: * * - L1 vCPU: Memory write is logged to PML but not committed. * * - L1 test thread: Ignores the write because its last dirty ring entry * Resets the dirty ring which: * - Resets the A/D bits in EPT * - Issues tlb flush (invept), which is intercepted by L0 * * - L0: frees the whole nested ept mmu root as the response to invept, * and thus ensures that when memory write is retried, it will fault again * * - L1 vCPU: Same memory write is logged to the PML but not committed again. * * - L1 test thread: Ignores the write because its last dirty ring entry (again) * Resets the dirty ring which: * - Resets the A/D bits in EPT (again) * - Issues tlb flush (again) which is intercepted by L0 * * ... * * N times * * - L1 vCPU: Memory write is logged in the PML and then committed. * Lots of other memory writes are logged and committed. * ... * * - L1 test thread: Sees the memory write along with other memory writes * in the dirty ring, and since the write is usually not * the last entry in the dirty-ring and has a very outdated * iteration, the test fails. * * * Note that this is only possible when the write was the last log entry * write during iteration N-1, thus remember last iteration last log entry * and also don't fail when it is reported in the next iteration, together with * an outdated iteration count.
*/ static uint64_t dirty_ring_prev_iteration_last_page;
enum log_mode_t { /* Only use KVM_GET_DIRTY_LOG for logging */
LOG_MODE_DIRTY_LOG = 0,
/* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
LOG_MODE_CLEAR_LOG = 1,
/* Use dirty ring for logging */
LOG_MODE_DIRTY_RING = 2,
LOG_MODE_NUM,
/* Run all supported modes */
LOG_MODE_ALL = LOG_MODE_NUM,
};
/* Mode of logging to test. Default is to run all supported modes */ staticenum log_mode_t host_log_mode_option = LOG_MODE_ALL; /* Logging mode for current run */ staticenum log_mode_t host_log_mode; static pthread_t vcpu_thread; static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
/* Should only be called after a GUEST_SYNC */ staticvoid vcpu_handle_sync_stop(void)
{ if (READ_ONCE(vcpu_stop)) {
sem_post(&sem_vcpu_stop);
sem_wait(&sem_vcpu_cont);
}
}
/* * We rely on vcpu exit due to full dirty ring state. Adjust * the ring buffer size to ensure we're able to reach the * full dirty ring state.
*/
pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3;
pages = vm_adjust_num_guest_pages(vm->mode, pages); if (vm->page_size < getpagesize())
pages = vm_num_host_pages(vm->mode, pages);
/* * Switch to dirty ring mode after VM creation but before any * of the vcpu creation.
*/
vm_enable_dirty_ring(vm, test_dirty_ring_count * sizeof(struct kvm_dirty_gfn));
}
/* Only have one vcpu */
count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
slot, bitmap, num_pages,
ring_buf_idx);
cleared = kvm_vm_reset_dirty_ring(vcpu->vm);
/* * Cleared pages should be the same as collected, as KVM is supposed to * clear only the entries that have been harvested.
*/
TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch " "with collected (%u)", cleared, count);
}
/* * Ensure both bitmaps are cleared, as a page can be written * multiple times per iteration, i.e. can show up in both * bitmaps, and the dirty ring is additive, i.e. doesn't purge * bitmap entries from previous collections.
*/ if (__test_and_clear_bit_le(page, bmap[1]) || bmap0_dirty) {
nr_dirty_pages++;
/* * If the page is dirty, the value written to memory * should be the current iteration number.
*/ if (val == iteration) continue;
if (host_log_mode == LOG_MODE_DIRTY_RING) { /* * The last page in the ring from previous * iteration can be written with the value * from the previous iteration, as the value to * be written may be cached in a CPU register.
*/ if (page == dirty_ring_prev_iteration_last_page &&
val == iteration - 1) continue;
/* * Any value from a previous iteration is legal * for the last entry, as the write may not yet * have retired, i.e. the page may hold whatever * it had before this iteration started.
*/ if (page == dirty_ring_last_page &&
val < iteration) continue;
} elseif (!val && iteration == 1 && bmap0_dirty) { /* * When testing get+clear, the dirty bitmap * starts with all bits set, and so the first * iteration can observe a "dirty" page that * was never written, but only in the first * bitmap (collecting the bitmap also clears * all dirty pages).
*/ continue;
}
TEST_FAIL("Dirty page %lu value (%lu) != iteration (%lu) " "(last = %lu, prev_last = %lu)",
page, val, iteration, dirty_ring_last_page,
dirty_ring_prev_iteration_last_page);
} else {
nr_clean_pages++; /* * If cleared, the value written can be any * value smaller than the iteration number.
*/
TEST_ASSERT(val < iteration, "Clear page %lu value (%lu) >= iteration (%lu) " "(last = %lu, prev_last = %lu)",
page, val, iteration, dirty_ring_last_page,
dirty_ring_prev_iteration_last_page);
}
}
if (!log_mode_supported()) {
print_skip("Log mode '%s' not supported",
log_modes[host_log_mode].name); return;
}
/* * We reserve page table for 2 times of extra dirty mem which * will definitely cover the original (1G+) test range. Here * we do the calculation with 4K page size which is the * smallest so the page number will be enough for all archs * (e.g., 64K page size guest will need even less memory for * page tables).
*/
vm = create_vm(mode, &vcpu,
2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), guest_code);
guest_page_size = vm->page_size; /* * A little more than 1G of guest page sized pages. Cover the * case where the size is not aligned to 64 pages.
*/
guest_num_pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3;
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
/* * The workaround in guest_code() to write all pages prior to the first * iteration isn't compatible with the dirty ring, as the dirty ring * support relies on the vCPU to actually stop when vcpu_stop is set so * that the vCPU doesn't hang waiting for the dirty ring to be emptied.
*/
TEST_ASSERT(host_log_mode != LOG_MODE_DIRTY_RING, "Test needs to be updated to support s390 dirty ring"); #endif
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
/* * Ensure the previous iteration didn't leave a dangling semaphore, i.e. * that the main task and vCPU worker were synchronized and completed * verification of all iterations.
*/
sem_getvalue(&sem_vcpu_stop, &sem_val);
TEST_ASSERT_EQ(sem_val, 0);
sem_getvalue(&sem_vcpu_cont, &sem_val);
TEST_ASSERT_EQ(sem_val, 0);
/* * Let the vCPU run beyond the configured interval until it has * performed the minimum number of writes. This verifies the * guest is making forward progress, e.g. isn't stuck because * of a KVM bug, and puts a firm floor on test coverage.
*/ for (i = 0; i < p->interval || nr_writes < TEST_MIN_WRITES_PER_ITERATION; i++) { /* * Sleep in 1ms chunks to keep the interval math simple * and so that the test doesn't run too far beyond the * specified interval.
*/
usleep(1000);
sync_global_from_guest(vm, nr_writes);
/* * Reap dirty pages while the guest is running so that * dirty ring full events are resolved, i.e. so that a * larger interval doesn't always end up with a vCPU * that's effectively blocked. Collecting while the * guest is running also verifies KVM doesn't lose any * state. * * For bitmap modes, KVM overwrites the entire bitmap, * i.e. collecting the bitmaps is destructive. Collect * the bitmap only on the first pass, otherwise this * test would lose track of dirty pages.
*/ if (i && host_log_mode != LOG_MODE_DIRTY_RING) continue;
/* * For the dirty ring, empty the ring on subsequent * passes only if the ring was filled at least once, * to verify KVM's handling of a full ring (emptying * the ring on every pass would make it unlikely the * vCPU would ever fill the fing).
*/ if (i && !READ_ONCE(dirty_ring_vcpu_ring_full)) continue;
/* * Stop the vCPU prior to collecting and verifying the dirty * log. If the vCPU is allowed to run during collection, then * pages that are written during this iteration may be missed, * i.e. collected in the next iteration. And if the vCPU is * writing memory during verification, pages that this thread * sees as clean may be written with this iteration's value.
*/
WRITE_ONCE(vcpu_stop, true);
sync_global_to_guest(vm, vcpu_stop);
sem_wait(&sem_vcpu_stop);
/* * Clear vcpu_stop after the vCPU thread has acknowledge the * stop request and is waiting, i.e. is definitely not running!
*/
WRITE_ONCE(vcpu_stop, false);
sync_global_to_guest(vm, vcpu_stop);
/* * Sync the number of writes performed before verification, the * info will be printed along with the dirty/clean page counts.
*/
sync_global_from_guest(vm, nr_writes);
/* * NOTE: for dirty ring, it's possible that we didn't stop at * GUEST_SYNC but instead we stopped because ring is full; * that's okay too because ring full means we're only missing * the flush of the last page, and since we handle the last * page specially verification will succeed anyway.
*/
log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
bmap[1], host_num_pages,
&ring_buf_idx);
vm_dirty_log_verify(mode, bmap);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.