// SPDX-License-Identifier: GPL-2.0 /* * page_fault_test.c - Test stage 2 faults. * * This test tries different combinations of guest accesses (e.g., write, * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on * hugetlbfs with a hole). It checks that the expected handling method is * called (e.g., uffd faults with the right address and write/read flag).
*/ #include <linux/bitmap.h> #include <fcntl.h> #include <test_util.h> #include <kvm_util.h> #include <processor.h> #include <asm/sysreg.h> #include <linux/bitfield.h> #include"guest_modes.h" #include"userfaultfd_util.h"
/* Guest virtual addresses that point to the test page and its PTE. */ #define TEST_GVA 0xc0000000 #define TEST_EXEC_GVA (TEST_GVA + 0x8) #define TEST_PTE_GVA 0xb0000000 #define TEST_DATA 0x0123456789ABCDEF
staticstruct event_cnt { int mmio_exits; int fail_vcpu_runs; int uffd_faults; /* uffd_faults is incremented from multiple threads. */
pthread_mutex_t uffd_faults_mutex;
} events;
asmvolatile("at s1e1r, %0" :: "r" (guest_test_memory));
isb();
par = read_sysreg(par_el1);
/* Bit 1 indicates whether the AT was successful */
GUEST_ASSERT_EQ(par & 1, 0);
}
/* * The size of the block written by "dc zva" is guaranteed to be between (2 << * 0) and (2 << 9), which is safe in our case as we need the write to happen * for at least a word, and not more than a page.
*/ staticvoid guest_dc_zva(void)
{
uint16_t val;
/* * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0). * And that's special because KVM must take special care with those: they * should still count as accesses for dirty logging or user-faulting, but * should be handled differently on mmio.
*/ staticvoid guest_ld_preidx(void)
{
uint64_t val;
uint64_t addr = TEST_GVA - 8;
/* * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is * in a gap between memslots not backing by anything.
*/ asmvolatile("ldr %0, [%1, #8]!"
: "=r" (val), "+r" (addr));
GUEST_ASSERT_EQ(val, 0);
GUEST_ASSERT_EQ(addr, TEST_GVA);
}
/* Returns true to continue the test, and false if it should be skipped. */ staticint uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, struct uffd_args *args)
{
uint64_t addr = msg->arg.pagefault.address;
uint64_t flags = msg->arg.pagefault.flags; struct uffdio_copy copy; int ret;
TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, "The only expected UFFD mode is MISSING");
TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
/* getpage_size() is not always equal to vm->page_size */
bmap = bitmap_zalloc(size / getpagesize());
kvm_vm_get_dirty_log(vm, region->region.slot, bmap);
first_page_dirty = test_bit(host_pg_nr, bmap);
free(bmap); return first_page_dirty;
}
/* Returns true to continue the test, and false if it should be skipped. */ staticbool handle_cmd(struct kvm_vm *vm, int cmd)
{ struct userspace_mem_region *data_region, *pt_region; bool continue_test = true;
uint64_t pte_gpa, pte_pg;
if (cmd & CMD_HOLE_PT)
continue_test = punch_hole_in_backing_store(vm, pt_region); if (cmd & CMD_HOLE_DATA)
continue_test = punch_hole_in_backing_store(vm, data_region); if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)
TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0), "Missing write in dirty log"); if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg), "Missing s1ptw write in dirty log"); if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0), "Unexpected write in dirty log"); if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg), "Unexpected s1ptw write in dirty log");
return continue_test;
}
void fail_vcpu_run_no_handler(int ret)
{
TEST_FAIL("Unexpected vcpu run failure");
}
void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
{
TEST_ASSERT(errno == ENOSYS, "The mmio handler should have returned not implemented.");
events.fail_vcpu_runs += 1;
}
/* * Note that this function runs on the host before the test VM starts: there's * no need to sync the D$ and I$ caches.
*/ staticvoid load_exec_code_for_test(struct kvm_vm *vm)
{
uint64_t *code; struct userspace_mem_region *region; void *hva;
region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
hva = (void *)region->region.userspace_addr;
region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); /* Map TEST_GVA first. This will install a new PTE. */
virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr); /* Then map TEST_PTE_GVA to the above PTE. */
pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);
}
/* * Create a memslot for code and data at pfn=0, and test-data and PT ones * at max_gfn.
*/ staticvoid setup_memslots(struct kvm_vm *vm, struct test_params *p)
{
uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
uint64_t guest_page_size = vm->page_size;
uint64_t max_gfn = vm_compute_max_gfn(vm); /* Enough for 2M of code when using 4K guest pages. */
uint64_t code_npages = 512;
uint64_t pt_size, data_size, data_gpa;
/* * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13 * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use * twice that just in case.
*/
pt_size = 26 * guest_page_size;
/* memslot sizes and gpa's must be aligned to the backing page size */
pt_size = align_up(pt_size, backing_src_pagesz);
data_size = align_up(guest_page_size, backing_src_pagesz);
data_gpa = (max_gfn * guest_page_size) - data_size;
data_gpa = align_down(data_gpa, backing_src_pagesz);
/* * This function either succeeds, skips the test (after setting test->skip), or * fails with a TEST_FAIL that aborts all tests.
*/ staticvoid vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, struct test_desc *test)
{ struct kvm_run *run; struct ucall uc; int ret;
run = vcpu->run;
for (;;) {
ret = _vcpu_run(vcpu); if (ret) {
test->fail_vcpu_run_handler(ret); goto done;
}
switch (get_ucall(vcpu, &uc)) { case UCALL_SYNC: if (!handle_cmd(vm, uc.args[1])) {
test->skip = true; goto done;
} break; case UCALL_ABORT:
REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; case UCALL_NONE: if (run->exit_reason == KVM_EXIT_MMIO)
test->mmio_handler(vm, run); break; default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
}
/* * Set some code in the data memslot for the guest to execute (only * applicable to the EXEC tests). This has to be done before * setup_uffd() as that function copies the memslot data for the uffd * handler.
*/
load_exec_code_for_test(vm);
setup_uffd(vm, p, &pt_uffd, &data_uffd);
setup_abort_handlers(vm, vcpu, test);
setup_default_handlers(test);
vcpu_args_set(vcpu, 1, test);
/* * Make sure we check the events after the uffd threads have exited, * which means they updated their respective event counters.
*/ if (!test->skip)
check_event_counts(test);
}
/* Check that HW is setting the Access Flag (AF) (sanity checks). */
TEST_ACCESS(guest_read64, with_af, CMD_NONE),
TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),
TEST_ACCESS(guest_cas, with_af, CMD_NONE),
TEST_ACCESS(guest_write64, with_af, CMD_NONE),
TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),
TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),
TEST_ACCESS(guest_exec, with_af, CMD_NONE),
/* * Punch a hole in the data backing store, and then try multiple * accesses: reads should rturn zeroes, and writes should * re-populate the page. Moreover, the test also check that no * exception was generated in the guest. Note that this * reading/writing behavior is the same as reading/writing a * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from * userspace.
*/
TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),
TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),
TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),
TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),
TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),
TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
/* * Punch holes in the data and PT backing stores and mark them for * userfaultfd handling. This should result in 2 faults: the access * on the data backing store, and its respective S1 page table walk * (S1PTW).
*/
TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2), /* * Can't test guest_at with_af as it's IMPDEF whether the AF is set. * The S1PTW fault should still be marked as a write.
*/
TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_no_handler, uffd_pt_handler, 1),
TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
uffd_data_handler, uffd_pt_handler, 2),
/* * Try accesses when the data and PT memory regions are both * tracked for dirty logging.
*/
TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,
guest_check_no_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_ld_preidx, with_af,
guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,
guest_check_no_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
/* * Access when the data and PT memory regions are both marked for * dirty logging and UFFD at the same time. The expected result is * that writes should mark the dirty log and trigger a userfaultfd * write fault. Reads/execs should result in a read userfaultfd * fault, and nothing in the dirty log. Any S1PTW should result in * a write in the dirty log and a userfaultfd write.
*/
TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,
uffd_data_handler, 2,
guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,
uffd_data_handler, 2,
guest_check_no_write_in_dirty_log,
guest_check_no_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,
uffd_data_handler,
2, guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,
guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,
uffd_data_handler, 2,
guest_check_no_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,
uffd_data_handler,
2, guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,
uffd_data_handler, 2,
guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,
uffd_data_handler,
2, guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log),
TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
uffd_data_handler, 2,
guest_check_write_in_dirty_log,
guest_check_s1ptw_wr_in_dirty_log), /* * Access when both the PT and data regions are marked read-only * (with KVM_MEM_READONLY). Writes with a syndrome result in an * MMIO exit, writes with no syndrome (e.g., CAS) result in a * failed vcpu run, and reads/execs with and without syndroms do * not fault.
*/
TEST_RO_MEMSLOT(guest_read64, 0, 0),
TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),
TEST_RO_MEMSLOT(guest_at, 0, 0),
TEST_RO_MEMSLOT(guest_exec, 0, 0),
TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),
TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),
TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
/* * The PT and data regions are both read-only and marked * for dirty logging at the same time. The expected result is that * for writes there should be no write in the dirty log. The * readonly handling is the same as if the memslot was not marked * for dirty logging: writes with a syndrome result in an MMIO * exit, and writes with no syndrome result in a failed vcpu run.
*/
TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,
guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,
guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,
guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,
guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,
1, guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,
guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,
guest_check_no_write_in_dirty_log),
TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,
guest_check_no_write_in_dirty_log),
/* * The PT and data regions are both read-only and punched with * holes tracked with userfaultfd. The expected result is the * union of both userfaultfd and read-only behaviors. For example, * write accesses result in a userfaultfd write fault and an MMIO * exit. Writes with no syndrome result in a failed vcpu run and * no userfaultfd write fault. Reads result in userfaultfd getting * triggered.
*/
TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),
TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),
TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),
TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),
TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
uffd_data_handler, 2),
TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),
TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),
TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.