/* maximum percentage of steal time for polling. >100 is treated like 100 */ static u8 halt_poll_max_steal = 10;
module_param(halt_poll_max_steal, byte, 0644);
MODULE_PARM_DESC(halt_poll_max_steal, "Maximum percentage of steal time to allow polling");
/* if set to true, the GISA will be initialized and used if available */ staticbool use_gisa = true;
module_param(use_gisa, bool, 0644);
MODULE_PARM_DESC(use_gisa, "Use the GISA if the host supports it.");
/* maximum diag9c forwarding per second */ unsignedint diag9c_forwarding_hz;
module_param(diag9c_forwarding_hz, uint, 0644);
MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
/* * allow asynchronous deinit for protected guests; enable by default since * the feature is opt-in anyway
*/ staticint async_destroy = 1;
module_param(async_destroy, int, 0444);
MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
/* * For now we handle at most 16 double words as this is what the s390 base * kernel handles and stores in the prefix page. If we ever need to go beyond * this, this requires changes to code, but the external uapi can stay.
*/ #define SIZE_INTERNAL 16
/* * Base feature mask that defines default mask for facilities. Consists of the * defines in FACILITIES_KVM and the non-hypervisor managed bits.
*/ staticunsignedlong kvm_s390_fac_base[SIZE_INTERNAL] = { FACILITIES_KVM }; /* * Extended feature mask. Consists of the defines in FACILITIES_KVM_CPUMODEL * and defines the facilities that can be enabled via a cpu model.
*/ staticunsignedlong kvm_s390_fac_ext[SIZE_INTERNAL] = { FACILITIES_KVM_CPUMODEL };
/* available cpu features supported by kvm */ static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS); /* available subfunctions indicated via query / "test bit" */ staticstruct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
/* * The TOD jumps by delta, we have to compensate this by adding * -delta to the epoch.
*/
delta = -delta;
/* sign-extension - we're adding to signed values below */ if ((s64)delta < 0)
delta_idx = -1;
scb->epoch += delta; if (scb->ecd & ECD_MEF) {
scb->epdx += delta_idx; if (scb->epoch < delta)
scb->epdx += 1;
}
}
/* * This callback is executed during stop_machine(). All CPUs are therefore * temporarily stopped. In order not to change guest behavior, we have to * disable preemption whenever we touch the epoch of kvm and the VCPUs, * so a CPU won't be stopped while calculating with the epoch.
*/ staticint kvm_clock_sync(struct notifier_block *notifier, unsignedlong val, void *v)
{ struct kvm *kvm; struct kvm_vcpu *vcpu; unsignedlong i; unsignedlonglong *delta = v;
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
kvm_clock_sync_scb(vcpu->arch.sie_block, *delta); if (i == 0) {
kvm->arch.epoch = vcpu->arch.sie_block->epoch;
kvm->arch.epdx = vcpu->arch.sie_block->epdx;
} if (vcpu->arch.cputm_enabled)
vcpu->arch.cputm_start += *delta; if (vcpu->arch.vsie_block)
kvm_clock_sync_scb(vcpu->arch.vsie_block,
*delta);
}
} return NOTIFY_OK;
}
if (test_facility(146)) /* MSA8 */
__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
kvm_s390_available_subfunc.kma);
if (test_facility(155)) /* MSA9 */
__cpacf_query(CPACF_KDSA, (cpacf_mask_t *)
kvm_s390_available_subfunc.kdsa);
if (test_facility(150)) /* SORTL */
__sortl_query(&kvm_s390_available_subfunc.sortl);
if (test_facility(151)) /* DFLTCC */
__dfltcc_query(&kvm_s390_available_subfunc.dfltcc);
if (test_facility(201)) /* PFCR */
pfcr_query(&kvm_s390_available_subfunc.pfcr);
if (machine_has_esop())
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP); /* * We need SIE support, ESOP (PROT_READ protection for gmap_shadow), * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
*/ if (!sclp.has_sief2 || !machine_has_esop() || !sclp.has_64bscao ||
!test_facility(3) || !nested) return;
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2); if (sclp.has_64bscao)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO); if (sclp.has_siif)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF); if (sclp.has_gpere)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE); if (sclp.has_gsls)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS); if (sclp.has_ib)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB); if (sclp.has_cei)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI); if (sclp.has_ibs)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS); if (sclp.has_kss)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS); /* * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make * all skey handling functions read/set the skey from the PGSTE * instead of the real storage key. * * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make * pages being detected as preserved although they are resident. * * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY. * * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be * correctly shadowed. We can do that for the PGSTE but not for PTE.I. * * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We * cannot easily shadow the SCA because of the ipte lock.
*/
}
staticint __init __kvm_s390_init(void)
{ int rc = -ENOMEM;
kvm_s390_gib_destroy(); if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM))
kvm_s390_pci_exit();
debug_unregister(kvm_s390_dbf);
debug_unregister(kvm_s390_dbf_uv);
}
/* Section: device related */ long kvm_arch_dev_ioctl(struct file *filp, unsignedint ioctl, unsignedlong arg)
{ if (ioctl == KVM_S390_ENABLE_SIE) return s390_enable_sie(); return -EINVAL;
}
int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
{ int r;
switch (ext) { case KVM_CAP_S390_PSW: case KVM_CAP_S390_GMAP: case KVM_CAP_SYNC_MMU: #ifdef CONFIG_KVM_S390_UCONTROL case KVM_CAP_S390_UCONTROL: #endif case KVM_CAP_ASYNC_PF: case KVM_CAP_SYNC_REGS: case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_S390_CSS_SUPPORT: case KVM_CAP_IOEVENTFD: case KVM_CAP_S390_IRQCHIP: case KVM_CAP_VM_ATTRIBUTES: case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_S390_INJECT_IRQ: case KVM_CAP_S390_USER_SIGP: case KVM_CAP_S390_USER_STSI: case KVM_CAP_S390_SKEYS: case KVM_CAP_S390_IRQ_STATE: case KVM_CAP_S390_USER_INSTR0: case KVM_CAP_S390_CMMA_MIGRATION: case KVM_CAP_S390_AIS: case KVM_CAP_S390_AIS_MIGRATION: case KVM_CAP_S390_VCPU_RESETS: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318: case KVM_CAP_IRQFD_RESAMPLE:
r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2:
r = KVM_GUESTDBG_VALID_MASK; break; case KVM_CAP_S390_HPAGE_1M:
r = 0; if (hpage && !(kvm && kvm_is_ucontrol(kvm)))
r = 1; break; case KVM_CAP_S390_MEM_OP:
r = MEM_OP_MAX_SIZE; break; case KVM_CAP_S390_MEM_OP_EXTENSION: /* * Flag bits indicating which extensions are supported. * If r > 0, the base extension must also be supported/indicated, * in order to maintain backwards compatibility.
*/
r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG; break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID:
r = KVM_S390_BSCA_CPU_SLOTS; if (!kvm_s390_use_sca_entries())
r = KVM_MAX_VCPUS; elseif (sclp.has_esca && sclp.has_64bscao)
r = KVM_S390_ESCA_CPU_SLOTS; if (ext == KVM_CAP_NR_VCPUS)
r = min_t(unsignedint, num_online_cpus(), r); break; case KVM_CAP_S390_COW:
r = machine_has_esop(); break; case KVM_CAP_S390_VECTOR_REGISTERS:
r = test_facility(129); break; case KVM_CAP_S390_RI:
r = test_facility(64); break; case KVM_CAP_S390_GS:
r = test_facility(133); break; case KVM_CAP_S390_BPB:
r = test_facility(82); break; case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
r = async_destroy && is_prot_virt_host(); break; case KVM_CAP_S390_PROTECTED:
r = is_prot_virt_host(); break; case KVM_CAP_S390_PROTECTED_DUMP: {
u64 pv_cmds_dump[] = {
BIT_UVC_CMD_DUMP_INIT,
BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE,
BIT_UVC_CMD_DUMP_CPU,
BIT_UVC_CMD_DUMP_COMPLETE,
}; int i;
r = is_prot_virt_host();
for (i = 0; i < ARRAY_SIZE(pv_cmds_dump); i++) { if (!test_bit_inv(pv_cmds_dump[i],
(unsignedlong *)&uv_info.inst_calls_list)) {
r = 0; break;
}
} break;
} case KVM_CAP_S390_ZPCI_OP:
r = kvm_s390_pci_interp_allowed(); break; case KVM_CAP_S390_CPU_TOPOLOGY:
r = test_facility(11); break; default:
r = 0;
} return r;
}
/* Loop over all guest segments */
cur_gfn = memslot->base_gfn;
last_gfn = memslot->base_gfn + memslot->npages; for (; cur_gfn <= last_gfn; cur_gfn += _PAGE_ENTRIES) {
gaddr = gfn_to_gpa(cur_gfn);
vmaddr = gfn_to_hva_memslot(memslot, cur_gfn); if (kvm_is_error_hva(vmaddr)) continue;
bitmap_zero(bitmap, _PAGE_ENTRIES);
gmap_sync_dirty_log_pmd(gmap, bitmap, gaddr, vmaddr); for (i = 0; i < _PAGE_ENTRIES; i++) { if (test_bit(i, bitmap))
mark_page_dirty(kvm, cur_gfn + i);
}
if (fatal_signal_pending(current)) return;
cond_resched();
}
}
/* Section: vm related */ staticvoid sca_del_vcpu(struct kvm_vcpu *vcpu);
/* * Get (and clear) the dirty memory log for a memory slot.
*/ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
{ int r; unsignedlong n; struct kvm_memory_slot *memslot; int is_dirty;
if (kvm_is_ucontrol(kvm)) return -EINVAL;
mutex_lock(&kvm->slots_lock);
r = -EINVAL; if (log->slot >= KVM_USER_MEM_SLOTS) goto out;
r = kvm_get_dirty_log(kvm, log, &is_dirty, &memslot); if (r) goto out;
/* Clear the dirty log */ if (is_dirty) {
n = kvm_dirty_bitmap_bytes(memslot);
memset(memslot->dirty_bitmap, 0, n);
}
r = 0;
out:
mutex_unlock(&kvm->slots_lock); return r;
}
int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
{ int r;
if (cap->flags) return -EINVAL;
switch (cap->cap) { case KVM_CAP_S390_IRQCHIP:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
kvm->arch.use_irqchip = 1;
r = 0; break; case KVM_CAP_S390_USER_SIGP:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
kvm->arch.user_sigp = 1;
r = 0; break; case KVM_CAP_S390_VECTOR_REGISTERS:
mutex_lock(&kvm->lock); if (kvm->created_vcpus) {
r = -EBUSY;
} elseif (cpu_has_vx()) {
set_kvm_facility(kvm->arch.model.fac_mask, 129);
set_kvm_facility(kvm->arch.model.fac_list, 129); if (test_facility(134)) {
set_kvm_facility(kvm->arch.model.fac_mask, 134);
set_kvm_facility(kvm->arch.model.fac_list, 134);
} if (test_facility(135)) {
set_kvm_facility(kvm->arch.model.fac_mask, 135);
set_kvm_facility(kvm->arch.model.fac_list, 135);
} if (test_facility(148)) {
set_kvm_facility(kvm->arch.model.fac_mask, 148);
set_kvm_facility(kvm->arch.model.fac_list, 148);
} if (test_facility(152)) {
set_kvm_facility(kvm->arch.model.fac_mask, 152);
set_kvm_facility(kvm->arch.model.fac_list, 152);
} if (test_facility(192)) {
set_kvm_facility(kvm->arch.model.fac_mask, 192);
set_kvm_facility(kvm->arch.model.fac_list, 192);
} if (test_facility(198)) {
set_kvm_facility(kvm->arch.model.fac_mask, 198);
set_kvm_facility(kvm->arch.model.fac_list, 198);
} if (test_facility(199)) {
set_kvm_facility(kvm->arch.model.fac_mask, 199);
set_kvm_facility(kvm->arch.model.fac_list, 199);
}
r = 0;
} else
r = -EINVAL;
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
r ? "(not available)" : "(success)"); break; case KVM_CAP_S390_RI:
r = -EINVAL;
mutex_lock(&kvm->lock); if (kvm->created_vcpus) {
r = -EBUSY;
} elseif (test_facility(64)) {
set_kvm_facility(kvm->arch.model.fac_mask, 64);
set_kvm_facility(kvm->arch.model.fac_list, 64);
r = 0;
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
r ? "(not available)" : "(success)"); break; case KVM_CAP_S390_AIS:
mutex_lock(&kvm->lock); if (kvm->created_vcpus) {
r = -EBUSY;
} else {
set_kvm_facility(kvm->arch.model.fac_mask, 72);
set_kvm_facility(kvm->arch.model.fac_list, 72);
r = 0;
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: AIS %s",
r ? "(not available)" : "(success)"); break; case KVM_CAP_S390_GS:
r = -EINVAL;
mutex_lock(&kvm->lock); if (kvm->created_vcpus) {
r = -EBUSY;
} elseif (test_facility(133)) {
set_kvm_facility(kvm->arch.model.fac_mask, 133);
set_kvm_facility(kvm->arch.model.fac_list, 133);
r = 0;
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
r ? "(not available)" : "(success)"); break; case KVM_CAP_S390_HPAGE_1M:
mutex_lock(&kvm->lock); if (kvm->created_vcpus)
r = -EBUSY; elseif (!hpage || kvm->arch.use_cmma || kvm_is_ucontrol(kvm))
r = -EINVAL; else {
r = 0;
mmap_write_lock(kvm->mm);
kvm->mm->context.allow_gmap_hpage_1m = 1;
mmap_write_unlock(kvm->mm); /* * We might have to create fake 4k page * tables. To avoid that the hardware works on * stale PGSTEs, we emulate these instructions.
*/
kvm->arch.use_skf = 0;
kvm->arch.use_pfmfi = 0;
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_HPAGE %s",
r ? "(not available)" : "(success)"); break; case KVM_CAP_S390_USER_STSI:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
kvm->arch.user_stsi = 1;
r = 0; break; case KVM_CAP_S390_USER_INSTR0:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
kvm->arch.user_instr0 = 1;
icpt_operexc_on_all_vcpus(kvm);
r = 0; break; case KVM_CAP_S390_CPU_TOPOLOGY:
r = -EINVAL;
mutex_lock(&kvm->lock); if (kvm->created_vcpus) {
r = -EBUSY;
} elseif (test_facility(11)) {
set_kvm_facility(kvm->arch.model.fac_mask, 11);
set_kvm_facility(kvm->arch.model.fac_list, 11);
r = 0;
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_CPU_TOPOLOGY %s",
r ? "(not available)" : "(success)"); break; default:
r = -EINVAL; break;
} return r;
}
staticint kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
{ int ret;
switch (attr->attr) { case KVM_S390_VM_MEM_LIMIT_SIZE:
ret = 0;
VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
kvm->arch.mem_limit); if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
ret = -EFAULT; break; default:
ret = -ENXIO; break;
} return ret;
}
staticint kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
{ int ret; unsignedint idx; switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA:
ret = -ENXIO; if (!sclp.has_cmma) break;
VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
mutex_lock(&kvm->lock); if (kvm->created_vcpus)
ret = -EBUSY; elseif (kvm->mm->context.allow_gmap_hpage_1m)
ret = -EINVAL; else {
kvm->arch.use_cmma = 1; /* Not compatible with cmma. */
kvm->arch.use_pfmfi = 0;
ret = 0;
}
mutex_unlock(&kvm->lock); break; case KVM_S390_VM_MEM_CLR_CMMA:
ret = -ENXIO; if (!sclp.has_cmma) break;
ret = -EINVAL; if (!kvm->arch.use_cmma) break;
if (get_user(new_limit, (u64 __user *)attr->addr)) return -EFAULT;
if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
new_limit > kvm->arch.mem_limit) return -E2BIG;
if (!new_limit) return -EINVAL;
/* gmap_create takes last usable address */ if (new_limit != KVM_S390_NO_MEM_LIMIT)
new_limit -= 1;
ret = -EBUSY;
mutex_lock(&kvm->lock); if (!kvm->created_vcpus) { /* gmap_create will round the limit up */ struct gmap *new = gmap_create(current->mm, new_limit);
if (!new) {
ret = -ENOMEM;
} else {
gmap_remove(kvm->arch.gmap);
new->private = kvm;
kvm->arch.gmap = new;
ret = 0;
}
}
mutex_unlock(&kvm->lock);
VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
VM_EVENT(kvm, 3, "New guest asce: 0x%p",
(void *) kvm->arch.gmap->asce); break;
} default:
ret = -ENXIO; break;
} return ret;
}
staticvoid kvm_s390_vcpu_pci_setup(struct kvm_vcpu *vcpu)
{ /* Only set the ECB bits after guest requests zPCI interpretation */ if (!vcpu->kvm->arch.use_zpci_interp) return;
/* * If host is configured for PCI and the necessary facilities are * available, turn on interpretation for the life of this guest
*/
kvm->arch.use_zpci_interp = 1;
/* * Must be called with kvm->srcu held to avoid races on memslots, and with * kvm->slots_lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
*/ staticint kvm_s390_vm_start_migration(struct kvm *kvm)
{ struct kvm_memory_slot *ms; struct kvm_memslots *slots; unsignedlong ram_pages = 0; int bkt;
/* migration mode already enabled */ if (kvm->arch.migration_mode) return 0;
slots = kvm_memslots(kvm); if (!slots || kvm_memslots_empty(slots)) return -EINVAL;
if (!kvm->arch.use_cmma) {
kvm->arch.migration_mode = 1; return 0;
} /* mark all the pages in active slots as dirty */
kvm_for_each_memslot(ms, bkt, slots) { if (!ms->dirty_bitmap) return -EINVAL; /* * The second half of the bitmap is only used on x86, * and would be wasted otherwise, so we put it to good * use here to keep track of the state of the storage * attributes.
*/
memset(kvm_second_dirty_bitmap(ms), 0xff, kvm_dirty_bitmap_bytes(ms));
ram_pages += ms->npages;
}
atomic64_set(&kvm->arch.cmma_dirty_pages, ram_pages);
kvm->arch.migration_mode = 1;
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION); return 0;
}
/* * Must be called with kvm->slots_lock to avoid races with ourselves and * kvm_s390_vm_start_migration.
*/ staticint kvm_s390_vm_stop_migration(struct kvm *kvm)
{ /* migration mode already disabled */ if (!kvm->arch.migration_mode) return 0;
kvm->arch.migration_mode = 0; if (kvm->arch.use_cmma)
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION); return 0;
}
staticint kvm_s390_vm_set_migration(struct kvm *kvm, struct kvm_device_attr *attr)
{ int res = -ENXIO;
mutex_lock(&kvm->slots_lock); switch (attr->attr) { case KVM_S390_VM_MIGRATION_START:
res = kvm_s390_vm_start_migration(kvm); break; case KVM_S390_VM_MIGRATION_STOP:
res = kvm_s390_vm_stop_migration(kvm); break; default: break;
}
mutex_unlock(&kvm->slots_lock);
staticint kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
{ int ret;
if (attr->flags) return -EINVAL;
mutex_lock(&kvm->lock); /* * For protected guests, the TOD is managed by the ultravisor, so trying * to change it will never bring the expected results.
*/ if (kvm_s390_pv_is_protected(kvm)) {
ret = -EOPNOTSUPP; goto out_unlock;
}
switch (attr->attr) { case KVM_S390_VM_TOD_EXT:
ret = kvm_s390_set_tod_ext(kvm, attr); break; case KVM_S390_VM_TOD_HIGH:
ret = kvm_s390_set_tod_high(kvm, attr); break; case KVM_S390_VM_TOD_LOW:
ret = kvm_s390_set_tod_low(kvm, attr); break; default:
ret = -ENXIO; break;
}
gtod = kvm_s390_get_tod_clock_fast(kvm); if (copy_to_user((void __user *)attr->addr, >od, sizeof(gtod))) return -EFAULT;
VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
return 0;
}
staticint kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
{ int ret;
if (attr->flags) return -EINVAL;
switch (attr->attr) { case KVM_S390_VM_TOD_EXT:
ret = kvm_s390_get_tod_ext(kvm, attr); break; case KVM_S390_VM_TOD_HIGH:
ret = kvm_s390_get_tod_high(kvm, attr); break; case KVM_S390_VM_TOD_LOW:
ret = kvm_s390_get_tod_low(kvm, attr); break; default:
ret = -ENXIO; break;
} return ret;
}
staticint kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
{ struct kvm_s390_vm_cpu_processor *proc;
u16 lowest_ibc, unblocked_ibc; int ret = 0;
staticint kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{ int ret = -ENXIO;
switch (attr->attr) { case KVM_S390_VM_CPU_PROCESSOR:
ret = kvm_s390_set_processor(kvm, attr); break; case KVM_S390_VM_CPU_PROCESSOR_FEAT:
ret = kvm_s390_set_processor_feat(kvm, attr); break; case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
ret = kvm_s390_set_processor_subfunc(kvm, attr); break; case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
ret = kvm_s390_set_uv_feat(kvm, attr); break;
} return ret;
}
staticint kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
{ struct kvm_s390_vm_cpu_processor *proc; int ret = 0;
staticint kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
{ int ret = -ENXIO;
switch (attr->attr) { case KVM_S390_VM_CPU_PROCESSOR:
ret = kvm_s390_get_processor(kvm, attr); break; case KVM_S390_VM_CPU_MACHINE:
ret = kvm_s390_get_machine(kvm, attr); break; case KVM_S390_VM_CPU_PROCESSOR_FEAT:
ret = kvm_s390_get_processor_feat(kvm, attr); break; case KVM_S390_VM_CPU_MACHINE_FEAT:
ret = kvm_s390_get_machine_feat(kvm, attr); break; case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
ret = kvm_s390_get_processor_subfunc(kvm, attr); break; case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
ret = kvm_s390_get_machine_subfunc(kvm, attr); break; case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
ret = kvm_s390_get_processor_uv_feat(kvm, attr); break; case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST:
ret = kvm_s390_get_machine_uv_feat(kvm, attr); break;
} return ret;
}
/** * kvm_s390_update_topology_change_report - update CPU topology change report * @kvm: guest KVM description * @val: set or clear the MTCR bit * * Updates the Multiprocessor Topology-Change-Report bit to signal * the guest with a topology change. * This is only relevant if the topology facility is present. * * The SCA version, bsca or esca, doesn't matter as offset is the same.
*/ staticvoid kvm_s390_update_topology_change_report(struct kvm *kvm, bool val)
{ union sca_utility new, old; struct bsca_block *sca;
read_lock(&kvm->arch.sca_lock);
sca = kvm->arch.sca;
old = READ_ONCE(sca->utility); do { new = old; new.mtcr = val;
} while (!try_cmpxchg(&sca->utility.val, &old.val, new.val));
read_unlock(&kvm->arch.sca_lock);
}
staticint kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{ int ret;
switch (attr->group) { case KVM_S390_VM_MEM_CTRL:
ret = kvm_s390_set_mem_control(kvm, attr); break; case KVM_S390_VM_TOD:
ret = kvm_s390_set_tod(kvm, attr); break; case KVM_S390_VM_CPU_MODEL:
ret = kvm_s390_set_cpu_model(kvm, attr); break; case KVM_S390_VM_CRYPTO:
ret = kvm_s390_vm_set_crypto(kvm, attr); break; case KVM_S390_VM_MIGRATION:
ret = kvm_s390_vm_set_migration(kvm, attr); break; case KVM_S390_VM_CPU_TOPOLOGY:
ret = kvm_s390_set_topo_change_indication(kvm, attr); break; default:
ret = -ENXIO; break;
}
return ret;
}
staticint kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{ int ret;
switch (attr->group) { case KVM_S390_VM_MEM_CTRL:
ret = kvm_s390_get_mem_control(kvm, attr); break; case KVM_S390_VM_TOD:
ret = kvm_s390_get_tod(kvm, attr); break; case KVM_S390_VM_CPU_MODEL:
ret = kvm_s390_get_cpu_model(kvm, attr); break; case KVM_S390_VM_MIGRATION:
ret = kvm_s390_vm_get_migration(kvm, attr); break; case KVM_S390_VM_CPU_TOPOLOGY:
ret = kvm_s390_get_topo_change_indication(kvm, attr); break; default:
ret = -ENXIO; break;
}
return ret;
}
staticint kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
{ int ret;
switch (attr->group) { case KVM_S390_VM_MEM_CTRL: switch (attr->attr) { case KVM_S390_VM_MEM_ENABLE_CMMA: case KVM_S390_VM_MEM_CLR_CMMA:
ret = sclp.has_cmma ? 0 : -ENXIO; break; case KVM_S390_VM_MEM_LIMIT_SIZE:
ret = 0; break; default:
ret = -ENXIO; break;
} break; case KVM_S390_VM_TOD: switch (attr->attr) { case KVM_S390_VM_TOD_LOW: case KVM_S390_VM_TOD_HIGH:
ret = 0; break; default:
ret = -ENXIO; break;
} break; case KVM_S390_VM_CPU_MODEL: switch (attr->attr) { case KVM_S390_VM_CPU_PROCESSOR: case KVM_S390_VM_CPU_MACHINE: case KVM_S390_VM_CPU_PROCESSOR_FEAT: case KVM_S390_VM_CPU_MACHINE_FEAT: case KVM_S390_VM_CPU_MACHINE_SUBFUNC: case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC: case KVM_S390_VM_CPU_MACHINE_UV_FEAT_GUEST: case KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST:
ret = 0; break; default:
ret = -ENXIO; break;
} break; case KVM_S390_VM_CRYPTO: switch (attr->attr) { case KVM_S390_VM_CRYPTO_ENABLE_AES_KW: case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW: case KVM_S390_VM_CRYPTO_DISABLE_AES_KW: case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
ret = 0; break; case KVM_S390_VM_CRYPTO_ENABLE_APIE: case KVM_S390_VM_CRYPTO_DISABLE_APIE:
ret = ap_instructions_available() ? 0 : -ENXIO; break; default:
ret = -ENXIO; break;
} break; case KVM_S390_VM_MIGRATION:
ret = 0; break; case KVM_S390_VM_CPU_TOPOLOGY:
ret = test_kvm_facility(kvm, 11) ? 0 : -ENXIO; break; default:
ret = -ENXIO; break;
}
return ret;
}
staticint kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
{
uint8_t *keys;
uint64_t hva; int srcu_idx, i, r = 0;
if (args->flags != 0) return -EINVAL;
/* Is this guest using storage keys? */ if (!mm_uses_skeys(current->mm)) return KVM_S390_GET_SKEYS_NONE;
/* Enforce sane limit on memory allocation */ if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) return -EINVAL;
keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); if (!keys) return -ENOMEM;
mmap_read_lock(current->mm);
srcu_idx = srcu_read_lock(&kvm->srcu); for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) {
r = -EFAULT; break;
}
r = get_guest_storage_key(current->mm, hva, &keys[i]); if (r) break;
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
mmap_read_unlock(current->mm);
if (!r) {
r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys, sizeof(uint8_t) * args->count); if (r)
r = -EFAULT;
}
kvfree(keys); return r;
}
staticint kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
{
uint8_t *keys;
uint64_t hva; int srcu_idx, i, r = 0; bool unlocked;
if (args->flags != 0) return -EINVAL;
/* Enforce sane limit on memory allocation */ if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX) return -EINVAL;
keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL_ACCOUNT); if (!keys) return -ENOMEM;
r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr, sizeof(uint8_t) * args->count); if (r) {
r = -EFAULT; goto out;
}
/* Enable storage key handling for the guest */
r = s390_enable_skey(); if (r) goto out;
i = 0;
mmap_read_lock(current->mm);
srcu_idx = srcu_read_lock(&kvm->srcu); while (i < args->count) {
unlocked = false;
hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) {
r = -EFAULT; break;
}
/* Lowest order bit is reserved */ if (keys[i] & 0x01) {
r = -EINVAL; break;
}
r = set_guest_storage_key(current->mm, hva, keys[i], 0); if (r) {
r = fixup_user_fault(current->mm, hva,
FAULT_FLAG_WRITE, &unlocked); if (r) break;
} if (!r)
i++;
}
srcu_read_unlock(&kvm->srcu, srcu_idx);
mmap_read_unlock(current->mm);
out:
kvfree(keys); return r;
}
/* * Base address and length must be sent at the start of each block, therefore * it's cheaper to send some clean data, as long as it's less than the size of * two longs.
*/ #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *)) /* for consistency */ #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
args->count = 0; while (args->count < bufsize) {
hva = gfn_to_hva(kvm, cur_gfn); /* * We return an error if the first value was invalid, but we * return successfully if at least one value was copied.
*/ if (kvm_is_error_hva(hva)) return args->count ? 0 : -EFAULT; if (get_pgste(kvm->mm, hva, &pgstev) < 0)
pgstev = 0;
res[args->count++] = (pgstev >> 24) & 0x43;
cur_gfn++;
}
if (ms->base_gfn + ms->npages <= cur_gfn) {
mnode = rb_next(mnode); /* If we are above the highest slot, wrap around */ if (!mnode)
mnode = rb_first(&slots->gfn_tree);
ms = container_of(mnode, struct kvm_memory_slot, gfn_node[slots->node_idx]);
ofs = 0;
}
while (args->count < bufsize) {
hva = gfn_to_hva(kvm, cur_gfn); if (kvm_is_error_hva(hva)) return 0; /* Decrement only if we actually flipped the bit to 0 */ if (test_and_clear_bit(cur_gfn - ms->base_gfn, kvm_second_dirty_bitmap(ms)))
atomic64_dec(&kvm->arch.cmma_dirty_pages); if (get_pgste(kvm->mm, hva, &pgstev) < 0)
pgstev = 0; /* Save the value */
res[args->count++] = (pgstev >> 24) & 0x43; /* If the next bit is too far away, stop. */ if (next_gfn > cur_gfn + KVM_S390_MAX_BIT_DISTANCE) return 0; /* If we reached the previous "next", find the next one */ if (cur_gfn == next_gfn)
next_gfn = kvm_s390_next_dirty_cmma(slots, cur_gfn + 1); /* Reached the end of memory or of the buffer, stop */ if ((next_gfn >= mem_end) ||
(next_gfn - args->start_gfn >= bufsize)) return 0;
cur_gfn++; /* Reached the end of the current memslot, take the next one. */ if (cur_gfn - ms->base_gfn >= ms->npages) {
ms = gfn_to_memslot(kvm, cur_gfn); if (!ms) return 0;
}
} return 0;
}
/* * This function searches for the next page with dirty CMMA attributes, and * saves the attributes in the buffer up to either the end of the buffer or * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found; * no trailing clean bytes are saved. * In case no dirty bits were found, or if CMMA was not enabled or used, the * output buffer will indicate 0 as length.
*/ staticint kvm_s390_get_cmma_bits(struct kvm *kvm, struct kvm_s390_cmma_log *args)
{ unsignedlong bufsize; int srcu_idx, peek, ret;
u8 *values;
if (!kvm->arch.use_cmma) return -ENXIO; /* Invalid/unsupported flags were specified */ if (args->flags & ~KVM_S390_CMMA_PEEK) return -EINVAL; /* Migration mode query, and we are not doing a migration */
peek = !!(args->flags & KVM_S390_CMMA_PEEK); if (!peek && !kvm->arch.migration_mode) return -EINVAL; /* CMMA is disabled or was not used, or the buffer has length zero */
bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX); if (!bufsize || !kvm->mm->context.uses_cmm) {
memset(args, 0, sizeof(*args)); return 0;
} /* We are not peeking, and there are no dirty pages */ if (!peek && !atomic64_read(&kvm->arch.cmma_dirty_pages)) {
memset(args, 0, sizeof(*args)); return 0;
}
values = vmalloc(bufsize); if (!values) return -ENOMEM;
mmap_read_lock(kvm->mm);
srcu_idx = srcu_read_lock(&kvm->srcu); if (peek)
ret = kvm_s390_peek_cmma(kvm, args, values, bufsize); else
ret = kvm_s390_get_cmma(kvm, args, values, bufsize);
srcu_read_unlock(&kvm->srcu, srcu_idx);
mmap_read_unlock(kvm->mm);
if (kvm->arch.migration_mode)
args->remaining = atomic64_read(&kvm->arch.cmma_dirty_pages); else
args->remaining = 0;
if (copy_to_user((void __user *)args->values, values, args->count))
ret = -EFAULT;
vfree(values); return ret;
}
/* * This function sets the CMMA attributes for the given pages. If the input * buffer has zero length, no action is taken, otherwise the attributes are * set and the mm->context.uses_cmm flag is set.
*/ staticint kvm_s390_set_cmma_bits(struct kvm *kvm, conststruct kvm_s390_cmma_log *args)
{ unsignedlong hva, mask, pgstev, i;
uint8_t *bits; int srcu_idx, r = 0;
mask = args->mask;
if (!kvm->arch.use_cmma) return -ENXIO; /* invalid/unsupported flags */ if (args->flags != 0) return -EINVAL; /* Enforce sane limit on memory allocation */ if (args->count > KVM_S390_CMMA_SIZE_MAX) return -EINVAL; /* Nothing to do */ if (args->count == 0) return 0;
bits = vmalloc(array_size(sizeof(*bits), args->count)); if (!bits) return -ENOMEM;
r = copy_from_user(bits, (void __user *)args->values, args->count); if (r) {
r = -EFAULT; goto out;
}
mmap_read_lock(kvm->mm);
srcu_idx = srcu_read_lock(&kvm->srcu); for (i = 0; i < args->count; i++) {
hva = gfn_to_hva(kvm, args->start_gfn + i); if (kvm_is_error_hva(hva)) {
r = -EFAULT; break;
}
/** * kvm_s390_cpus_from_pv - Convert all protected vCPUs in a protected VM to * non protected. * @kvm: the VM whose protected vCPUs are to be converted * @rc: return value for the RC field of the UVC (in case of error) * @rrc: return value for the RRC field of the UVC (in case of error) * * Does not stop in case of error, tries to convert as many * CPUs as possible. In case of error, the RC and RRC of the last error are * returned. * * Return: 0 in case of success, otherwise -EIO
*/ int kvm_s390_cpus_from_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
{ struct kvm_vcpu *vcpu; unsignedlong i;
u16 _rc, _rrc; int ret = 0;
/* * We ignore failures and try to destroy as many CPUs as possible. * At the same time we must not free the assigned resources when * this fails, as the ultravisor has still access to that memory. * So kvm_s390_pv_destroy_cpu can leave a "wanted" memory leak * behind. * We want to return the first failure rc and rrc, though.
*/
kvm_for_each_vcpu(i, vcpu, kvm) {
mutex_lock(&vcpu->mutex); if (kvm_s390_pv_destroy_cpu(vcpu, &_rc, &_rrc) && !ret) {
*rc = _rc;
*rrc = _rrc;
ret = -EIO;
}
mutex_unlock(&vcpu->mutex);
} /* Ensure that we re-enable gisa if the non-PV guest used it but the PV guest did not. */ if (use_gisa)
kvm_s390_gisa_enable(kvm); return ret;
}
/** * kvm_s390_cpus_to_pv - Convert all non-protected vCPUs in a protected VM * to protected. * @kvm: the VM whose protected vCPUs are to be converted * @rc: return value for the RC field of the UVC (in case of error) * @rrc: return value for the RRC field of the UVC (in case of error) * * Tries to undo the conversion in case of error. * * Return: 0 in case of success, otherwise -EIO
*/ staticint kvm_s390_cpus_to_pv(struct kvm *kvm, u16 *rc, u16 *rrc)
{ unsignedlong i; int r = 0;
u16 dummy;
struct kvm_vcpu *vcpu;
/* Disable the GISA if the ultravisor does not support AIV. */ if (!uv_has_feature(BIT_UV_FEAT_AIV))
kvm_s390_gisa_disable(kvm);
kvm_for_each_vcpu(i, vcpu, kvm) {
mutex_lock(&vcpu->mutex);
r = kvm_s390_pv_create_cpu(vcpu, rc, rrc);
mutex_unlock(&vcpu->mutex); if (r) break;
} if (r)
kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); return r;
}
/* * Here we provide user space with a direct interface to query UV * related data like UV maxima and available features as well as * feature specific data. * * To facilitate future extension of the data structures we'll try to * write data up to the maximum requested length.
*/ static ssize_t kvm_s390_handle_pv_info(struct kvm_s390_pv_info *info)
{
ssize_t len_min;
/* It's max cpuid not max cpus, so it's off by one */
info->vm.max_cpus = uv_info.max_guest_cpu_id + 1;
info->vm.max_guests = uv_info.max_num_sec_conf;
info->vm.max_guest_addr = uv_info.max_sec_stor_addr;
info->vm.feature_indication = uv_info.uv_feature_indications;
switch (dmp.subcmd) { case KVM_PV_DUMP_INIT: { if (kvm->arch.pv.dumping) break;
/* * Block SIE entry as concurrent dump UVCs could lead * to validities.
*/
kvm_s390_vcpu_block_all(kvm);
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
UVC_CMD_DUMP_INIT, &cmd->rc, &cmd->rrc);
KVM_UV_EVENT(kvm, 3, "PROTVIRT DUMP INIT: rc %x rrc %x",
cmd->rc, cmd->rrc); if (!r) {
kvm->arch.pv.dumping = true;
} else {
kvm_s390_vcpu_unblock_all(kvm);
r = -EINVAL;
} break;
} case KVM_PV_DUMP_CONFIG_STOR_STATE: { if (!kvm->arch.pv.dumping) break;
/* * gaddr is an output parameter since we might stop * early. As dmp will be copied back in our caller, we * don't need to do it ourselves.
*/
r = kvm_s390_pv_dump_stor_state(kvm, result_buff, &dmp.gaddr, dmp.buff_len,
&cmd->rc, &cmd->rrc); break;
} case KVM_PV_DUMP_COMPLETE: { if (!kvm->arch.pv.dumping) break;
r = -EINVAL; if (dmp.buff_len < uv_info.conf_dump_finalize_len) break;
r = kvm_s390_pv_dump_complete(kvm, result_buff,
&cmd->rc, &cmd->rrc); break;
} default:
r = -ENOTTY; break;
}
switch (cmd->cmd) { case KVM_PV_ENABLE: {
r = -EINVAL; if (kvm_s390_pv_is_protected(kvm)) break;
/* * FMT 4 SIE needs esca. As we never switch back to bsca from * esca, we need no cleanup in the error cases below
*/
r = sca_switch_to_extended(kvm); if (r) break;
mmap_write_lock(kvm->mm);
r = gmap_helper_disable_cow_sharing();
mmap_write_unlock(kvm->mm); if (r) break;
r = kvm_s390_pv_init_vm(kvm, &cmd->rc, &cmd->rrc); if (r) break;
r = kvm_s390_cpus_to_pv(kvm, &cmd->rc, &cmd->rrc); if (r)
kvm_s390_pv_deinit_vm(kvm, &dummy, &dummy);
/* we need to block service interrupts from now on */
set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); break;
} case KVM_PV_ASYNC_CLEANUP_PREPARE:
r = -EINVAL; if (!kvm_s390_pv_is_protected(kvm) || !async_destroy) break;
r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); /* * If a CPU could not be destroyed, destroy VM will also fail. * There is no point in trying to destroy it. Instead return * the rc and rrc from the first CPU that failed destroying.
*/ if (r) break;
r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
/* no need to block service interrupts any more */
clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); break; case KVM_PV_ASYNC_CLEANUP_PERFORM:
r = -EINVAL; if (!async_destroy) break; /* kvm->lock must not be held; this is asserted inside the function. */
r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc); break; case KVM_PV_DISABLE: {
r = -EINVAL; if (!kvm_s390_pv_is_protected(kvm)) break;
r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); /* * If a CPU could not be destroyed, destroy VM will also fail. * There is no point in trying to destroy it. Instead return * the rc and rrc from the first CPU that failed destroying.
*/ if (r) break;
r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
/* no need to block service interrupts any more */
clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); break;
} case KVM_PV_SET_SEC_PARMS: { struct kvm_s390_pv_sec_parm parms = {}; void *hdr;
r = -EINVAL; if (!kvm_s390_pv_is_protected(kvm)) break;
r = -EFAULT; if (copy_from_user(&parms, argp, sizeof(parms))) break;
/* Currently restricted to 8KB */
r = -EINVAL; if (parms.length > PAGE_SIZE * 2) break;
r = -ENOMEM;
hdr = vmalloc(parms.length); if (!hdr) break;
r = -EFAULT; if (!copy_from_user(hdr, (void __user *)parms.origin,
parms.length))
r = kvm_s390_pv_set_sec_parms(kvm, hdr, parms.length,
&cmd->rc, &cmd->rrc);
r = -EINVAL; if (!kvm_s390_pv_is_protected(kvm) || !mm_is_protected(kvm->mm)) break;
r = -EFAULT; if (copy_from_user(&unp, argp, sizeof(unp))) break;
r = kvm_s390_pv_unpack(kvm, unp.addr, unp.size, unp.tweak,
&cmd->rc, &cmd->rrc); break;
} case KVM_PV_VERIFY: {
r = -EINVAL; if (!kvm_s390_pv_is_protected(kvm)) break;
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
UVC_CMD_VERIFY_IMG, &cmd->rc, &cmd->rrc);
KVM_UV_EVENT(kvm, 3, "PROTVIRT VERIFY: rc %x rrc %x", cmd->rc,
cmd->rrc); break;
} case KVM_PV_PREP_RESET: {
r = -EINVAL; if (!kvm_s390_pv_is_protected(kvm)) break;
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
UVC_CMD_PREPARE_RESET, &cmd->rc, &cmd->rrc);
KVM_UV_EVENT(kvm, 3, "PROTVIRT PREP RESET: rc %x rrc %x",
cmd->rc, cmd->rrc); break;
} case KVM_PV_UNSHARE_ALL: {
r = -EINVAL; if (!kvm_s390_pv_is_protected(kvm)) break;
r = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
UVC_CMD_SET_UNSHARE_ALL, &cmd->rc, &cmd->rrc);
KVM_UV_EVENT(kvm, 3, "PROTVIRT UNSHARE: rc %x rrc %x",
cmd->rc, cmd->rrc); break;
} case KVM_PV_INFO: { struct kvm_s390_pv_info info = {};
ssize_t data_len;
/* * No need to check the VM protection here. * * Maybe user space wants to query some of the data * when the VM is still unprotected. If we see the * need to fence a new data command we can still * return an error in the info handler.
*/
r = -EFAULT; if (copy_from_user(&info, argp, sizeof(info.header))) break;
r = -EINVAL; if (info.header.len_max < sizeof(info.header)) break;
data_len = kvm_s390_handle_pv_info(&info); if (data_len < 0) {
r = data_len; break;
} /* * If a data command struct is extended (multiple * times) this can be used to determine how much of it * is valid.
*/
info.header.len_written = data_len;
r = -EFAULT; if (copy_to_user(argp, &info, data_len)) break;
r = 0; break;
} case KVM_PV_DUMP: { struct kvm_s390_pv_dmp dmp;
r = -EINVAL; if (!kvm_s390_pv_is_protected(kvm)) break;
r = -EFAULT; if (copy_from_user(&dmp, argp, sizeof(dmp))) break;
r = kvm_s390_pv_dmp(kvm, cmd, dmp); if (r) break;
if (copy_to_user(argp, &dmp, sizeof(dmp))) {
r = -EFAULT; break;
}
break;
} default:
r = -ENOTTY;
} if (need_lock)
mutex_unlock(&kvm->lock);
r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION); if (r) return r; /* * This validates off_in_quad. Checking that size is a power * of two is not necessary, as cmpxchg_guest_abs_with_key * takes care of that
*/ if (mop->size > sizeof(new)) return -EINVAL; if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size)) return -EFAULT; if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size)) return -EFAULT;
srcu_idx = srcu_read_lock(&kvm->srcu);
if (!kvm_is_gpa_in_memslot(kvm, mop->gaddr)) {
r = PGM_ADDRESSING; goto out_unlock;
}
r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad, new.quad, mop->key, &success); if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
r = -EFAULT;
staticint kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
{ /* * This is technically a heuristic only, if the kvm->lock is not * taken, it is not guaranteed that the vm is/remains non-protected. * This is ok from a kernel perspective, wrongdoing is detected * on the access, -EFAULT is returned and the vm may crash the * next time it accesses the memory in question. * There is no sane usecase to do switching and a memop on two * different CPUs at the same time.
*/ if (kvm_s390_pv_get_handle(kvm)) return -EINVAL;
switch (mop->op) { case KVM_S390_MEMOP_ABSOLUTE_READ: case KVM_S390_MEMOP_ABSOLUTE_WRITE: return kvm_s390_vm_mem_op_abs(kvm, mop); case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG: return kvm_s390_vm_mem_op_cmpxchg(kvm, mop); default: return -EINVAL;
}
}
switch (ioctl) { case KVM_S390_INTERRUPT: { struct kvm_s390_interrupt s390int;
r = -EFAULT; if (copy_from_user(&s390int, argp, sizeof(s390int))) break;
r = kvm_s390_inject_vm(kvm, &s390int); break;
} case KVM_CREATE_IRQCHIP: {
r = -EINVAL; if (kvm->arch.use_irqchip)
r = 0; break;
} case KVM_SET_DEVICE_ATTR: {
r = -EFAULT; if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) break;
r = kvm_s390_vm_set_attr(kvm, &attr); break;
} case KVM_GET_DEVICE_ATTR: {
r = -EFAULT; if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) break;
r = kvm_s390_vm_get_attr(kvm, &attr); break;
} case KVM_HAS_DEVICE_ATTR: {
r = -EFAULT; if (copy_from_user(&attr, (void __user *)arg, sizeof(attr))) break;
r = kvm_s390_vm_has_attr(kvm, &attr); break;
} case KVM_S390_GET_SKEYS: { struct kvm_s390_skeys args;
r = -EFAULT; if (copy_from_user(&args, argp, sizeof(struct kvm_s390_skeys))) break;
r = kvm_s390_get_skeys(kvm, &args); break;
} case KVM_S390_SET_SKEYS: { struct kvm_s390_skeys args;
r = -EFAULT; if (copy_from_user(&args, argp, sizeof(struct kvm_s390_skeys))) break;
r = kvm_s390_set_skeys(kvm, &args); break;
} case KVM_S390_GET_CMMA_BITS: { struct kvm_s390_cmma_log args;
r = -EFAULT; if (copy_from_user(&args, argp, sizeof(args))) break;
mutex_lock(&kvm->slots_lock);
r = kvm_s390_get_cmma_bits(kvm, &args);
mutex_unlock(&kvm->slots_lock); if (!r) {
r = copy_to_user(argp, &args, sizeof(args)); if (r)
r = -EFAULT;
} break;
} case KVM_S390_SET_CMMA_BITS: { struct kvm_s390_cmma_log args;
r = -EFAULT; if (copy_from_user(&args, argp, sizeof(args))) break;
mutex_lock(&kvm->slots_lock);
r = kvm_s390_set_cmma_bits(kvm, &args);
mutex_unlock(&kvm->slots_lock); break;
} case KVM_S390_PV_COMMAND: { struct kvm_pv_cmd args;
/* protvirt means user cpu state */
kvm_s390_set_user_cpu_state_ctrl(kvm);
r = 0; if (!is_prot_virt_host()) {
r = -EINVAL; break;
} if (copy_from_user(&args, argp, sizeof(args))) {
r = -EFAULT; break;
} if (args.flags) {
r = -EINVAL; break;
} /* must be called without kvm->lock */
r = kvm_s390_handle_pv(kvm, &args); if (copy_to_user(argp, &args, sizeof(args))) {
r = -EFAULT; break;
} break;
} case KVM_S390_MEM_OP: { struct kvm_s390_mem_op mem_op;
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
r = kvm_s390_vm_mem_op(kvm, &mem_op); else
r = -EFAULT; break;
} case KVM_S390_ZPCI_OP: { struct kvm_s390_zpci_op args;
r = -EINVAL; if (!IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) break; if (copy_from_user(&args, argp, sizeof(args))) {
r = -EFAULT; break;
}
r = kvm_s390_pci_zpci_op(kvm, &args); break;
} default:
r = -ENOTTY;
}
if (ap_instructions_available()) { if (ap_qci(&info) == 0) return info.apxa;
}
return 0;
}
/* * The format of the crypto control block (CRYCB) is specified in the 3 low * order bits of the CRYCB designation (CRYCBD) field as follows: * Format 0: Neither the message security assist extension 3 (MSAX3) nor the * AP extended addressing (APXA) facility are installed. * Format 1: The APXA facility is not installed but the MSAX3 facility is. * Format 2: Both the APXA and MSAX3 facilities are installed
*/ staticvoid kvm_s390_set_crycb_format(struct kvm *kvm)
{
kvm->arch.crypto.crycbd = virt_to_phys(kvm->arch.crypto.crycb);
/* Clear the CRYCB format bits - i.e., set format 0 by default */
kvm->arch.crypto.crycbd &= ~(CRYCB_FORMAT_MASK);
/* Check whether MSAX3 is installed */ if (!test_kvm_facility(kvm, 76)) return;
if (kvm_s390_apxa_installed())
kvm->arch.crypto.crycbd |= CRYCB_FORMAT2; else
kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
}
/* * kvm_arch_crypto_set_masks * * @kvm: pointer to the target guest's KVM struct containing the crypto masks * to be set. * @apm: the mask identifying the accessible AP adapters * @aqm: the mask identifying the accessible AP domains * @adm: the mask identifying the accessible AP control domains * * Set the masks that identify the adapters, domains and control domains to * which the KVM guest is granted access. * * Note: The kvm->lock mutex must be locked by the caller before invoking this * function.
*/ void kvm_arch_crypto_set_masks(struct kvm *kvm, unsignedlong *apm, unsignedlong *aqm, unsignedlong *adm)
{ struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
/* recreate the shadow crycb for each vcpu */
kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
kvm_s390_vcpu_unblock_all(kvm);
}
EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
/* * kvm_arch_crypto_clear_masks * * @kvm: pointer to the target guest's KVM struct containing the crypto masks * to be cleared. * * Clear the masks that identify the adapters, domains and control domains to * which the KVM guest is granted access. * * Note: The kvm->lock mutex must be locked by the caller before invoking this * function.
*/ void kvm_arch_crypto_clear_masks(struct kvm *kvm)
{
kvm_s390_vcpu_block_all(kvm);
for (i = 0; i < kvm_s390_fac_size(); i++) {
kvm->arch.model.fac_mask[i] = stfle_fac_list[i] &
(kvm_s390_fac_base[i] |
kvm_s390_fac_ext[i]);
kvm->arch.model.fac_list[i] = stfle_fac_list[i] &
kvm_s390_fac_base[i];
}
kvm->arch.model.subfuncs = kvm_s390_available_subfunc;
/* we are always in czam mode - even on pre z14 machines */
set_kvm_facility(kvm->arch.model.fac_mask, 138);
set_kvm_facility(kvm->arch.model.fac_list, 138); /* we emulate STHYI in kvm */
set_kvm_facility(kvm->arch.model.fac_mask, 74);
set_kvm_facility(kvm->arch.model.fac_list, 74); if (machine_has_tlb_guest()) {
set_kvm_facility(kvm->arch.model.fac_mask, 147);
set_kvm_facility(kvm->arch.model.fac_list, 147);
}
if (css_general_characteristics.aiv && test_facility(65))
set_kvm_facility(kvm->arch.model.fac_mask, 65);
if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV_KVM)) {
mutex_lock(&kvm->lock);
kvm_s390_pci_init_list(kvm);
kvm_s390_vcpu_pci_enable_interp(kvm);
mutex_unlock(&kvm->lock);
}
mutex_init(&kvm->arch.float_int.ais_lock);
spin_lock_init(&kvm->arch.float_int.lock); for (i = 0; i < FIRQ_LIST_COUNT; i++)
INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
init_waitqueue_head(&kvm->arch.ipte_wq);
mutex_init(&kvm->arch.ipte_mutex);
debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
VM_EVENT(kvm, 3, "vm created with type %lu", type);
if (kvm_is_ucontrol(vcpu->kvm))
gmap_remove(vcpu->arch.gmap);
if (vcpu->kvm->arch.use_cmma)
kvm_s390_vcpu_unsetup_cmma(vcpu); /* We can not hold the vcpu mutex here, we are already dying */ if (kvm_s390_pv_cpu_get_handle(vcpu))
kvm_s390_pv_destroy_cpu(vcpu, &rc, &rrc);
free_page((unsignedlong)(vcpu->arch.sie_block));
}
kvm_destroy_vcpus(kvm);
sca_dispose(kvm);
kvm_s390_gisa_destroy(kvm); /* * We are already at the end of life and kvm->lock is not taken. * This is ok as the file descriptor is closed by now and nobody * can mess with the pv state.
*/
kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc); /* * Remove the mmu notifier only when the whole KVM VM is torn down, * and only if one was registered to begin with. If the VM is * currently not protected, but has been previously been protected, * then it's possible that the notifier is still registered.
*/ if (kvm->arch.pv.mmu_notifier.ops)
mmu_notifier_unregister(&kvm->arch.pv.mmu_notifier, kvm->mm);
/* we still need the basic sca for the ipte control */
vcpu->arch.sie_block->scaoh = sca_phys >> 32;
vcpu->arch.sie_block->scaol = sca_phys; return;
}
read_lock(&vcpu->kvm->arch.sca_lock); if (vcpu->kvm->arch.use_esca) { struct esca_block *sca = vcpu->kvm->arch.sca;
phys_addr_t sca_phys = virt_to_phys(sca);
staticint sca_can_add_vcpu(struct kvm *kvm, unsignedint id)
{ int rc;
if (!kvm_s390_use_sca_entries()) { if (id < KVM_MAX_VCPUS) returntrue; returnfalse;
} if (id < KVM_S390_BSCA_CPU_SLOTS) returntrue; if (!sclp.has_esca || !sclp.has_64bscao) returnfalse;
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ staticvoid __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
vcpu->arch.cputm_start = get_tod_clock_fast();
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ staticvoid __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
vcpu->arch.cputm_start = 0;
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ staticvoid __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(vcpu->arch.cputm_enabled);
vcpu->arch.cputm_enabled = true;
__start_cpu_timer_accounting(vcpu);
}
/* needs disabled preemption to protect from TOD sync and vcpu_load/put */ staticvoid __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
__stop_cpu_timer_accounting(vcpu);
vcpu->arch.cputm_enabled = false;
}
staticvoid enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
__enable_cpu_timer_accounting(vcpu);
preempt_enable();
}
staticvoid disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
{
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
__disable_cpu_timer_accounting(vcpu);
preempt_enable();
}
/* set the cpu timer - may only be called from the VCPU thread itself */ void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
{
preempt_disable(); /* protect from TOD sync and vcpu_load/put */
raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount); if (vcpu->arch.cputm_enabled)
vcpu->arch.cputm_start = get_tod_clock_fast();
vcpu->arch.sie_block->cputm = cputm;
raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
preempt_enable();
}
/* update and get the cpu timer - can also be called from other VCPU threads */
__u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
{ unsignedint seq;
__u64 value;
if (unlikely(!vcpu->arch.cputm_enabled)) return vcpu->arch.sie_block->cputm;
preempt_disable(); /* protect from TOD sync and vcpu_load/put */ do {
seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount); /* * If the writer would ever execute a read in the critical * section, e.g. in irq context, we have a deadlock.
*/
WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
value = vcpu->arch.sie_block->cputm; /* if cputm_start is 0, accounting is being started/stopped */ if (likely(vcpu->arch.cputm_start))
value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
preempt_enable(); return value;
}
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
staticbool kvm_has_pckmo_ecc(struct kvm *kvm)
{ /* At least one ECC subfunction must be present */ return kvm_has_pckmo_subfunc(kvm, 32) ||
kvm_has_pckmo_subfunc(kvm, 33) ||
kvm_has_pckmo_subfunc(kvm, 34) ||
kvm_has_pckmo_subfunc(kvm, 40) ||
kvm_has_pckmo_subfunc(kvm, 41);
}
staticbool kvm_has_pckmo_hmac(struct kvm *kvm)
{ /* At least one HMAC subfunction must be present */ return kvm_has_pckmo_subfunc(kvm, 118) ||
kvm_has_pckmo_subfunc(kvm, 122);
}
staticvoid kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
{ /* * If the AP instructions are not being interpreted and the MSAX3 * facility is not configured for the guest, there is nothing to set up.
*/ if (!vcpu->kvm->arch.crypto.apie && !test_kvm_facility(vcpu->kvm, 76)) return;
if (vcpu->kvm->arch.crypto.apie)
vcpu->arch.sie_block->eca |= ECA_APIE;
/* Set up protected key support */ if (vcpu->kvm->arch.crypto.aes_kw) {
vcpu->arch.sie_block->ecb3 |= ECB3_AES; /* ecc/hmac is also wrapped with AES key */ if (kvm_has_pckmo_ecc(vcpu->kvm))
vcpu->arch.sie_block->ecd |= ECD_ECC; if (kvm_has_pckmo_hmac(vcpu->kvm))
vcpu->arch.sie_block->ecd |= ECD_HMAC;
}
if (vcpu->kvm->arch.crypto.dea_kw)
vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
}
if (test_kvm_facility(vcpu->kvm, 78))
kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED2); elseif (test_kvm_facility(vcpu->kvm, 8))
kvm_s390_set_cpuflags(vcpu, CPUSTAT_GED);
kvm_s390_vcpu_setup_model(vcpu);
/* pgste_set_pte has special handling for !machine_has_esop() */ if (machine_has_esop())
vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT; if (test_kvm_facility(vcpu->kvm, 9))
vcpu->arch.sie_block->ecb |= ECB_SRSI; if (test_kvm_facility(vcpu->kvm, 11))
vcpu->arch.sie_block->ecb |= ECB_PTF; if (test_kvm_facility(vcpu->kvm, 73))
vcpu->arch.sie_block->ecb |= ECB_TE; if (!kvm_is_ucontrol(vcpu->kvm))
vcpu->arch.sie_block->ecb |= ECB_SPECI;
if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI; if (test_kvm_facility(vcpu->kvm, 130))
vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI; if (sclp.has_cei)
vcpu->arch.sie_block->eca |= ECA_CEI; if (sclp.has_ib)
vcpu->arch.sie_block->eca |= ECA_IB; if (sclp.has_siif)
vcpu->arch.sie_block->eca |= ECA_SII; if (sclp.has_sigpif)
vcpu->arch.sie_block->eca |= ECA_SIGPI; if (test_kvm_facility(vcpu->kvm, 129)) {
vcpu->arch.sie_block->eca |= ECA_VX;
vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
} if (test_kvm_facility(vcpu->kvm, 139))
vcpu->arch.sie_block->ecd |= ECD_MEF; if (test_kvm_facility(vcpu->kvm, 156))
vcpu->arch.sie_block->ecd |= ECD_ETOKENF; if (vcpu->arch.sie_block->gd) {
vcpu->arch.sie_block->eca |= ECA_AIV;
VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
}
vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
KVM_SYNC_GPRS |
KVM_SYNC_ACRS |
KVM_SYNC_CRS |
KVM_SYNC_ARCH0 |
KVM_SYNC_PFAULT |
KVM_SYNC_DIAG318;
vcpu->arch.acrs_loaded = false;
kvm_s390_set_prefix(vcpu, 0); if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB; if (test_kvm_facility(vcpu->kvm, 82))
vcpu->run->kvm_valid_regs |= KVM_SYNC_BPBC; if (test_kvm_facility(vcpu->kvm, 133))
vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB; if (test_kvm_facility(vcpu->kvm, 156))
vcpu->run->kvm_valid_regs |= KVM_SYNC_ETOKEN; /* fprs can be synchronized via vrs, even if the guest has no vx. With * cpu_has_vx(), (load|store)_fpu_regs() will work with vrs format.
*/ if (cpu_has_vx())
vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS; else
vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
if (kvm_is_ucontrol(vcpu->kvm)) {
rc = __kvm_ucontrol_vcpu_init(vcpu); if (rc) goto out_free_sie_block;
}
VM_EVENT(vcpu->kvm, 3, "create cpu %d at 0x%p, sie block at 0x%p",
vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
trace_kvm_s390_create_vcpu(vcpu->vcpu_id, vcpu, vcpu->arch.sie_block);
rc = kvm_s390_vcpu_setup(vcpu); if (rc) goto out_ucontrol_uninit;
/* * Kick a guest cpu out of (v)SIE and wait until (v)SIE is not running. * If the CPU is not running (e.g. waiting as idle) the function will
* return immediately. */ void exit_sie(struct kvm_vcpu *vcpu)
{
kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOP_INT);
kvm_s390_vsie_kick(vcpu); while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
cpu_relax();
}
/* Kick a guest cpu out of SIE to process a request synchronously */ void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
{
__kvm_make_request(req, vcpu);
kvm_s390_vcpu_request(vcpu);
}
if (gmap_is_shadow(gmap)) return; if (start >= 1UL << 31) /* We are only interested in prefix pages */ return;
kvm_for_each_vcpu(i, vcpu, kvm) { /* match against both prefix pages */
prefix = kvm_s390_get_prefix(vcpu); if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
start, end);
kvm_s390_sync_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu);
}
}
}
bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
{ /* do not poll with more than halt_poll_max_steal percent of steal time */ if (get_lowcore()->avg_steal_timer * 100 / (TICK_USEC << 12) >=
READ_ONCE(halt_poll_max_steal)) {
vcpu->stat.halt_no_poll_steal++; returntrue;
} returnfalse;
}
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
{ /* kvm common code refers to this, but never calls it */
BUG(); return 0;
}
staticint kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{ int r = -EINVAL;
switch (reg->id) { case KVM_REG_S390_TODPR:
r = put_user(vcpu->arch.sie_block->todpr,
(u32 __user *)reg->addr); break; case KVM_REG_S390_EPOCHDIFF:
r = put_user(vcpu->arch.sie_block->epoch,
(u64 __user *)reg->addr); break; case KVM_REG_S390_CPU_TIMER:
r = put_user(kvm_s390_get_cpu_timer(vcpu),
(u64 __user *)reg->addr); break; case KVM_REG_S390_CLOCK_COMP:
r = put_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr); break; case KVM_REG_S390_PFTOKEN:
r = put_user(vcpu->arch.pfault_token,
(u64 __user *)reg->addr); break; case KVM_REG_S390_PFCOMPARE:
r = put_user(vcpu->arch.pfault_compare,
(u64 __user *)reg->addr); break; case KVM_REG_S390_PFSELECT:
r = put_user(vcpu->arch.pfault_select,
(u64 __user *)reg->addr); break; case KVM_REG_S390_PP:
r = put_user(vcpu->arch.sie_block->pp,
(u64 __user *)reg->addr); break; case KVM_REG_S390_GBEA:
r = put_user(vcpu->arch.sie_block->gbea,
(u64 __user *)reg->addr); break; default: break;
}
return r;
}
staticint kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{ int r = -EINVAL;
__u64 val;
switch (reg->id) { case KVM_REG_S390_TODPR:
r = get_user(vcpu->arch.sie_block->todpr,
(u32 __user *)reg->addr); break; case KVM_REG_S390_EPOCHDIFF:
r = get_user(vcpu->arch.sie_block->epoch,
(u64 __user *)reg->addr); break; case KVM_REG_S390_CPU_TIMER:
r = get_user(val, (u64 __user *)reg->addr); if (!r)
kvm_s390_set_cpu_timer(vcpu, val); break; case KVM_REG_S390_CLOCK_COMP:
r = get_user(vcpu->arch.sie_block->ckc,
(u64 __user *)reg->addr); break; case KVM_REG_S390_PFTOKEN:
r = get_user(vcpu->arch.pfault_token,
(u64 __user *)reg->addr); if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
kvm_clear_async_pf_completion_queue(vcpu); break; case KVM_REG_S390_PFCOMPARE:
r = get_user(vcpu->arch.pfault_compare,
(u64 __user *)reg->addr); break; case KVM_REG_S390_PFSELECT:
r = get_user(vcpu->arch.pfault_select,
(u64 __user *)reg->addr); break; case KVM_REG_S390_PP:
r = get_user(vcpu->arch.sie_block->pp,
(u64 __user *)reg->addr); break; case KVM_REG_S390_GBEA:
r = get_user(vcpu->arch.sie_block->gbea,
(u64 __user *)reg->addr); break; default: break;
}
kvm_clear_async_pf_completion_queue(vcpu); if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
kvm_s390_clear_local_irqs(vcpu);
}
staticvoid kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
{ /* Initial reset is a superset of the normal reset */
kvm_arch_vcpu_ioctl_normal_reset(vcpu);
/* * This equals initial cpu reset in pop, but we don't switch to ESA. * We do not only reset the internal data, but also ...
*/
vcpu->arch.sie_block->gpsw.mask = 0;
vcpu->arch.sie_block->gpsw.addr = 0;
kvm_s390_set_prefix(vcpu, 0);
kvm_s390_set_cpu_timer(vcpu, 0);
vcpu->arch.sie_block->ckc = 0;
memset(vcpu->arch.sie_block->gcr, 0, sizeof(vcpu->arch.sie_block->gcr));
vcpu->arch.sie_block->gcr[0] = CR0_INITIAL_MASK;
vcpu->arch.sie_block->gcr[14] = CR14_INITIAL_MASK;
/* ... the data in sync regs */
memset(vcpu->run->s.regs.crs, 0, sizeof(vcpu->run->s.regs.crs));
vcpu->run->s.regs.ckc = 0;
vcpu->run->s.regs.crs[0] = CR0_INITIAL_MASK;
vcpu->run->s.regs.crs[14] = CR14_INITIAL_MASK;
vcpu->run->psw_addr = 0;
vcpu->run->psw_mask = 0;
vcpu->run->s.regs.todpr = 0;
vcpu->run->s.regs.cputm = 0;
vcpu->run->s.regs.ckc = 0;
vcpu->run->s.regs.pp = 0;
vcpu->run->s.regs.gbea = 1;
vcpu->run->s.regs.fpc = 0; /* * Do not reset these registers in the protected case, as some of * them are overlaid and they are not accessible in this case * anyway.
*/ if (!kvm_s390_pv_cpu_is_protected(vcpu)) {
vcpu->arch.sie_block->gbea = 1;
vcpu->arch.sie_block->pp = 0;
vcpu->arch.sie_block->fpf &= ~FPF_BPBC;
vcpu->arch.sie_block->todpr = 0;
}
}
/** * __kvm_s390_mprotect_many() - Apply specified protection to guest pages * @gmap: the gmap of the guest * @gpa: the starting guest address * @npages: how many pages to protect * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE * @bits: pgste notification bits to set * * Returns: 0 in case of success, < 0 in case of error - see gmap_protect_one() * * Context: kvm->srcu and gmap->mm need to be held in read mode
*/ int __kvm_s390_mprotect_many(struct gmap *gmap, gpa_t gpa, u8 npages, unsignedint prot, unsignedlong bits)
{ unsignedint fault_flag = (prot & PROT_WRITE) ? FAULT_FLAG_WRITE : 0;
gpa_t end = gpa + npages * PAGE_SIZE; int rc;
staticint kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
{
retry:
kvm_s390_vcpu_request_handled(vcpu); if (!kvm_request_pending(vcpu)) return 0; /* * If the guest prefix changed, re-arm the ipte notifier for the * guest prefix page. gmap_mprotect_notify will wait on the ptl lock. * This ensures that the ipte instruction for this request has * already finished. We might race against a second unmapper that * wants to set the blocking bit. Lets just retry the request loop.
*/ if (kvm_check_request(KVM_REQ_REFRESH_GUEST_PREFIX, vcpu)) { int rc;
if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
vcpu->arch.sie_block->ihcpu = 0xffff; goto retry;
}
if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) { if (!ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
kvm_s390_set_cpuflags(vcpu, CPUSTAT_IBS);
} goto retry;
}
if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) { if (ibs_enabled(vcpu)) {
trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_IBS);
} goto retry;
}
if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
vcpu->arch.sie_block->ictl |= ICTL_OPEREXC; goto retry;
}
if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) { /* * Disable CMM virtualization; we will emulate the ESSA * instruction manually, in order to provide additional * functionalities needed for live migration.
*/
vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA; goto retry;
}
if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) { /* * Re-enable CMM virtualization if CMMA is available and * CMM has been used.
*/ if ((vcpu->kvm->arch.use_cmma) &&
(vcpu->kvm->mm->context.uses_cmm))
vcpu->arch.sie_block->ecb2 |= ECB2_CMMA; goto retry;
}
/* we left the vsie handler, nothing to do, just clear the request */
kvm_clear_request(KVM_REQ_VSIE_RESTART, vcpu);
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work)
{ /* s390 will always inject the page directly */
}
bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu)
{ /* * s390 will always inject the page directly, * but we still want check_async_completion to cleanup
*/ returntrue;
}
if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID) returnfalse; if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
vcpu->arch.pfault_compare) returnfalse; if (psw_extint_disabled(vcpu)) returnfalse; if (kvm_s390_vcpu_has_irq(vcpu, 0)) returnfalse; if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) returnfalse; if (!vcpu->arch.gmap->pfault_enabled) returnfalse;
hva = gfn_to_hva(vcpu->kvm, current->thread.gmap_teid.addr); if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8)) returnfalse;
staticint vcpu_pre_run(struct kvm_vcpu *vcpu)
{ int rc, cpuflags;
/* * On s390 notifications for arriving pages will be delivered directly * to the guest but the house keeping for completed pfaults is * handled outside the worker.
*/
kvm_check_async_pf_completion(vcpu);
VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
trace_kvm_s390_sie_fault(vcpu);
/* * We want to inject an addressing exception, which is defined as a * suppressing or terminating exception. However, since we came here * by a DAT access exception, the PSW still points to the faulting * instruction since DAT exceptions are nullifying. So we've got * to look up the current opcode to get the length of the instruction * to be able to forward the PSW.
*/
rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
ilen = insn_length(opcode); if (rc < 0) { return rc;
} elseif (rc) { /* Instruction-Fetching Exceptions - we can't detect the ilen. * Forward by arbitrary ilc, injection will take care of * nullification if necessary.
*/
pgm_info = vcpu->arch.pgm;
ilen = 4;
}
pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
kvm_s390_forward_psw(vcpu, ilen); return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
}
/* * __kvm_s390_handle_dat_fault() - handle a dat fault for the gmap of a vcpu * @vcpu: the vCPU whose gmap is to be fixed up * @gfn: the guest frame number used for memslots (including fake memslots) * @gaddr: the gmap address, does not have to match @gfn for ucontrol gmaps * @foll: FOLL_* flags * * Return: 0 on success, < 0 in case of error. * Context: The mm lock must not be held before calling. May sleep.
*/ int __kvm_s390_handle_dat_fault(struct kvm_vcpu *vcpu, gfn_t gfn, gpa_t gaddr, unsignedint foll)
{ struct kvm_memory_slot *slot; unsignedint fault_flags; bool writable, unlocked; unsignedlong vmaddr; struct page *page;
kvm_pfn_t pfn; int rc;
/* Access outside memory, inject addressing exception */ if (is_noslot_pfn(pfn)) return vcpu_post_run_addressing_exception(vcpu); /* Signal pending: try again */ if (pfn == KVM_PFN_ERR_SIGPENDING) return -EAGAIN;
/* Needs I/O, try to setup async pfault (only possible with FOLL_NOWAIT) */ if (pfn == KVM_PFN_ERR_NEEDS_IO) {
trace_kvm_s390_major_guest_pfault(vcpu); if (kvm_arch_setup_async_pf(vcpu)) return 0;
vcpu->stat.pfault_sync++; /* Could not setup async pfault, try again synchronously */
foll &= ~FOLL_NOWAIT; goto try_again;
} /* Any other error */ if (is_error_pfn(pfn)) return -EFAULT;
/* Success */
mmap_read_lock(vcpu->arch.gmap->mm); /* Mark the userspace PTEs as young and/or dirty, to avoid page fault loops */
rc = fixup_user_fault(vcpu->arch.gmap->mm, vmaddr, fault_flags, &unlocked); if (!rc)
rc = __gmap_link(vcpu->arch.gmap, gaddr, vmaddr);
scoped_guard(spinlock, &vcpu->kvm->mmu_lock) {
kvm_release_faultin_page(vcpu->kvm, page, false, writable);
}
mmap_read_unlock(vcpu->arch.gmap->mm); return rc;
}
gfn = gpa_to_gfn(gaddr); if (kvm_is_ucontrol(vcpu->kvm)) { /* * This translates the per-vCPU guest address into a * fake guest address, which can then be used with the * fake memslots that are identity mapping userspace. * This allows ucontrol VMs to use the normal fault * resolution path, like normal VMs.
*/
mmap_read_lock(vcpu->arch.gmap->mm);
gaddr_tmp = __gmap_translate(vcpu->arch.gmap, gaddr);
mmap_read_unlock(vcpu->arch.gmap->mm); if (gaddr_tmp == -EFAULT) {
vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
vcpu->run->s390_ucontrol.trans_exc_code = gaddr;
vcpu->run->s390_ucontrol.pgm_code = PGM_SEGMENT_TRANSLATION; return -EREMOTE;
}
gfn = gpa_to_gfn(gaddr_tmp);
} return __kvm_s390_handle_dat_fault(vcpu, gfn, gaddr, foll);
}
gaddr = current->thread.gmap_teid.addr * PAGE_SIZE; if (kvm_s390_cur_gmap_fault_is_write())
foll = FOLL_WRITE;
switch (current->thread.gmap_int_code & PGM_INT_CODE_MASK) { case 0:
vcpu->stat.exit_null++; break; case PGM_SECURE_STORAGE_ACCESS: case PGM_SECURE_STORAGE_VIOLATION:
kvm_s390_assert_primary_as(vcpu); /* * This can happen after a reboot with asynchronous teardown; * the new guest (normal or protected) will run on top of the * previous protected guest. The old pages need to be destroyed * so the new guest can use them.
*/ if (kvm_s390_pv_destroy_page(vcpu->kvm, gaddr)) { /* * Either KVM messed up the secure guest mapping or the * same page is mapped into multiple secure guests. * * This exception is only triggered when a guest 2 is * running and can therefore never occur in kernel * context.
*/
pr_warn_ratelimited("Secure storage violation (%x) in task: %s, pid %d\n",
current->thread.gmap_int_code, current->comm,
current->pid);
send_sig(SIGSEGV, current, 0);
} break; case PGM_NON_SECURE_STORAGE_ACCESS:
kvm_s390_assert_primary_as(vcpu); /* * This is normal operation; a page belonging to a protected * guest has not been imported yet. Try to import the page into * the protected guest.
*/
rc = kvm_s390_pv_convert_to_secure(vcpu->kvm, gaddr); if (rc == -EINVAL)
send_sig(SIGSEGV, current, 0); if (rc != -ENXIO) break;
foll = FOLL_WRITE;
fallthrough; case PGM_PROTECTION: case PGM_SEGMENT_TRANSLATION: case PGM_PAGE_TRANSLATION: case PGM_ASCE_TYPE: case PGM_REGION_FIRST_TRANS: case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS:
kvm_s390_assert_primary_as(vcpu); return vcpu_dat_fault_handler(vcpu, gaddr, foll); default:
KVM_BUG(1, vcpu->kvm, "Unexpected program interrupt 0x%x, TEID 0x%016lx",
current->thread.gmap_int_code, current->thread.gmap_teid.val);
send_sig(SIGSEGV, current, 0); break;
} return 0;
}
staticint vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{ struct mcck_volatile_info *mcck_info; struct sie_page *sie_page; int rc;
VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
vcpu->arch.sie_block->icptcode);
trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
if (guestdbg_enabled(vcpu))
kvm_s390_restore_guest_per_regs(vcpu);
int noinstr kvm_s390_enter_exit_sie(struct kvm_s390_sie_block *scb,
u64 *gprs, unsignedlong gasce)
{ int ret;
guest_state_enter_irqoff();
/* * The guest_state_{enter,exit}_irqoff() functions inform lockdep and * tracing that entry to the guest will enable host IRQs, and exit from * the guest will disable host IRQs. * * We must not use lockdep/tracing/RCU in this critical section, so we * use the low-level arch_local_irq_*() helpers to enable/disable IRQs.
*/
arch_local_irq_enable();
ret = sie64a(scb, gprs, gasce);
arch_local_irq_disable();
/* * We try to hold kvm->srcu during most of vcpu_run (except when run- * ning the guest), so that memslots (and other stuff) are protected
*/
kvm_vcpu_srcu_read_lock(vcpu);
do {
rc = vcpu_pre_run(vcpu); if (rc || guestdbg_exit_pending(vcpu)) break;
kvm_vcpu_srcu_read_unlock(vcpu); /* * As PF_VCPU will be used in fault handler, between * guest_timing_enter_irqoff and guest_timing_exit_irqoff * should be no uaccess.
*/ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(sie_page->pv_grregs,
vcpu->run->s.regs.gprs, sizeof(sie_page->pv_grregs));
}
if (kvm_s390_pv_cpu_is_protected(vcpu)) {
memcpy(vcpu->run->s.regs.gprs,
sie_page->pv_grregs, sizeof(sie_page->pv_grregs)); /* * We're not allowed to inject interrupts on intercepts * that leave the guest state in an "in-between" state * where the next SIE entry will do a continuation. * Fence interrupts in our "internal" PSW.
*/ if (vcpu->arch.sie_block->icptcode == ICPT_PV_INSTR ||
vcpu->arch.sie_block->icptcode == ICPT_PV_PREF) {
vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK;
}
}
kvm_vcpu_srcu_read_lock(vcpu);
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); /* some control register changes require a tlb flush */
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
} if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
}
save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs);
vcpu->arch.acrs_loaded = true;
kvm_s390_fpu_load(vcpu->run); /* Sync fmt2 only data */ if (likely(!kvm_s390_pv_cpu_is_protected(vcpu))) {
sync_regs_fmt2(vcpu);
} else { /* * In several places we have to modify our internal view to * not do things that are disallowed by the ultravisor. For * example we must not inject interrupts after specific exits * (e.g. 112 prefix page not secure). We do this by turning * off the machine check, external and I/O interrupt bits * of our PSW copy. To avoid getting validity intercepts, we * do only accept the condition code from userspace.
*/
vcpu->arch.sie_block->gpsw.mask &= ~PSW_MASK_CC;
vcpu->arch.sie_block->gpsw.mask |= kvm_run->psw_mask &
PSW_MASK_CC;
}
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
{ struct kvm_run *kvm_run = vcpu->run;
DECLARE_KERNEL_FPU_ONSTACK32(fpu); int rc;
/* * Running a VM while dumping always has the potential to * produce inconsistent dump data. But for PV vcpus a SIE * entry while dumping could also lead to a fatal validity * intercept which we absolutely want to avoid.
*/ if (vcpu->kvm->arch.pv.dumping) return -EINVAL;
if (!vcpu->wants_to_run) return -EINTR;
if (kvm_run->kvm_valid_regs & ~KVM_SYNC_S390_VALID_FIELDS ||
kvm_run->kvm_dirty_regs & ~KVM_SYNC_S390_VALID_FIELDS) return -EINVAL;
vcpu_load(vcpu);
if (guestdbg_exit_pending(vcpu)) {
kvm_s390_prepare_debug_exit(vcpu);
rc = 0; goto out;
}
kvm_sigset_activate(vcpu);
/* * no need to check the return value of vcpu_start as it can only have * an error for protvirt, but protvirt means user cpu state
*/ if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
} elseif (is_vcpu_stopped(vcpu)) {
pr_err_ratelimited("can't run stopped vcpu %d\n",
vcpu->vcpu_id);
rc = -EINVAL; goto out;
}
/* * store status at address * we use have two special cases: * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
*/ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsignedlong gpa)
{ unsignedchar archmode = 1;
freg_t fprs[NUM_FPRS]; unsignedint px;
u64 clkcomp, cputm; int rc;
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsignedlong addr)
{ /* * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy * switch in the run ioctl. Let's update our copies before we save * it into the save area
*/
kvm_s390_fpu_store(vcpu->run);
save_access_regs(vcpu->run->s.regs.acrs);
int kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
{ int i, online_vcpus, r = 0, started_vcpus = 0;
if (!is_vcpu_stopped(vcpu)) return 0;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1); /* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
/* Let's tell the UV that we want to change into the operating state */ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_OPR); if (r) {
spin_unlock(&vcpu->kvm->arch.start_stop_lock); return r;
}
}
for (i = 0; i < online_vcpus; i++) { if (!is_vcpu_stopped(kvm_get_vcpu(vcpu->kvm, i)))
started_vcpus++;
}
if (started_vcpus == 0) { /* we're the only active VCPU -> speed it up */
__enable_ibs_on_vcpu(vcpu);
} elseif (started_vcpus == 1) { /* * As we are starting a second VCPU, we have to disable * the IBS facility on all VCPUs to remove potentially * outstanding ENABLE requests.
*/
__disable_ibs_on_all_vcpus(vcpu->kvm);
}
kvm_s390_clear_cpuflags(vcpu, CPUSTAT_STOPPED); /* * The real PSW might have changed due to a RESTART interpreted by the * ultravisor. We block all interrupts and let the next sie exit * refresh our view.
*/ if (kvm_s390_pv_cpu_is_protected(vcpu))
vcpu->arch.sie_block->gpsw.mask &= ~PSW_INT_MASK; /* * Another VCPU might have used IBS while we were offline. * Let's play safe and flush the VCPU at startup.
*/
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
spin_unlock(&vcpu->kvm->arch.start_stop_lock); return 0;
}
int kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
{ int i, online_vcpus, r = 0, started_vcpus = 0; struct kvm_vcpu *started_vcpu = NULL;
if (is_vcpu_stopped(vcpu)) return 0;
trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0); /* Only one cpu at a time may enter/leave the STOPPED state. */
spin_lock(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
/* Let's tell the UV that we want to change into the stopped state */ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
r = kvm_s390_pv_set_cpu_state(vcpu, PV_CPU_STATE_STP); if (r) {
spin_unlock(&vcpu->kvm->arch.start_stop_lock); return r;
}
}
/* * Set the VCPU to STOPPED and THEN clear the interrupt flag, * now that the SIGP STOP and SIGP STOP AND STORE STATUS orders * have been fully processed. This will ensure that the VCPU * is kept BUSY if another VCPU is inquiring with SIGP SENSE.
*/
kvm_s390_set_cpuflags(vcpu, CPUSTAT_STOPPED);
kvm_s390_clear_stop_irq(vcpu);
__disable_ibs_on_vcpu(vcpu);
for (i = 0; i < online_vcpus; i++) { struct kvm_vcpu *tmp = kvm_get_vcpu(vcpu->kvm, i);
if (!is_vcpu_stopped(tmp)) {
started_vcpus++;
started_vcpu = tmp;
}
}
if (started_vcpus == 1) { /* * As we only have one VCPU left, we want to enable the * IBS facility for that VCPU to speed it up.
*/
__enable_ibs_on_vcpu(started_vcpu);
}
switch (mop->op) { case KVM_S390_MEMOP_LOGICAL_READ: case KVM_S390_MEMOP_LOGICAL_WRITE:
r = kvm_s390_vcpu_mem_op(vcpu, mop); break; case KVM_S390_MEMOP_SIDA_READ: case KVM_S390_MEMOP_SIDA_WRITE: /* we are locked against sida going away by the vcpu->mutex */
r = kvm_s390_vcpu_sida_op(vcpu, mop); break; default:
r = -EINVAL;
}
/* * To simplify single stepping of userspace-emulated instructions, * KVM_EXIT_S390_SIEIC exit sets KVM_GUESTDBG_EXIT_PENDING (see * should_handle_per_ifetch()). However, if userspace emulation injects * an interrupt, it needs to be cleared, so that KVM_EXIT_DEBUG happens * after (and not before) the interrupt delivery.
*/ if (!rc)
vcpu->guest_debug &= ~KVM_GUESTDBG_EXIT_PENDING;
/* On success copy over the dump data */ if (!ret && copy_to_user((__u8 __user *)dmp.buff_addr, data, uv_info.guest_cpu_stor_len))
ret = -EFAULT;
kvfree(data); return ret;
}
long kvm_arch_vcpu_ioctl(struct file *filp, unsignedint ioctl, unsignedlong arg)
{ struct kvm_vcpu *vcpu = filp->private_data; void __user *argp = (void __user *)arg; int idx; long r;
u16 rc, rrc;
vcpu_load(vcpu);
switch (ioctl) { case KVM_S390_STORE_STATUS:
idx = srcu_read_lock(&vcpu->kvm->srcu);
r = kvm_s390_store_status_unloaded(vcpu, arg);
srcu_read_unlock(&vcpu->kvm->srcu, idx); break; case KVM_S390_SET_INITIAL_PSW: {
psw_t psw;
r = -EFAULT; if (copy_from_user(&psw, argp, sizeof(psw))) break;
r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); break;
} case KVM_S390_CLEAR_RESET:
r = 0;
kvm_arch_vcpu_ioctl_clear_reset(vcpu); if (kvm_s390_pv_cpu_is_protected(vcpu)) {
r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
UVC_CMD_CPU_RESET_CLEAR, &rc, &rrc);
VCPU_EVENT(vcpu, 3, "PROTVIRT RESET CLEAR VCPU: rc %x rrc %x",
rc, rrc);
} break; case KVM_S390_INITIAL_RESET:
r = 0;
kvm_arch_vcpu_ioctl_initial_reset(vcpu); if (kvm_s390_pv_cpu_is_protected(vcpu)) {
r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
UVC_CMD_CPU_RESET_INITIAL,
&rc, &rrc);
VCPU_EVENT(vcpu, 3, "PROTVIRT RESET INITIAL VCPU: rc %x rrc %x",
rc, rrc);
} break; case KVM_S390_NORMAL_RESET:
r = 0;
kvm_arch_vcpu_ioctl_normal_reset(vcpu); if (kvm_s390_pv_cpu_is_protected(vcpu)) {
r = uv_cmd_nodata(kvm_s390_pv_cpu_get_handle(vcpu),
UVC_CMD_CPU_RESET, &rc, &rrc);
VCPU_EVENT(vcpu, 3, "PROTVIRT RESET NORMAL VCPU: rc %x rrc %x",
rc, rrc);
} break; case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { struct kvm_one_reg reg;
r = -EINVAL; if (kvm_s390_pv_cpu_is_protected(vcpu)) break;
r = -EFAULT; if (copy_from_user(®, argp, sizeof(reg))) break; if (ioctl == KVM_SET_ONE_REG)
r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); else
r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); break;
} #ifdef CONFIG_KVM_S390_UCONTROL case KVM_S390_UCAS_MAP: { struct kvm_s390_ucas_mapping ucasmap;
if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
r = -EFAULT; break;
}
if (!kvm_is_ucontrol(vcpu->kvm)) {
r = -EINVAL; break;
}
r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
ucasmap.vcpu_addr, ucasmap.length); break;
} case KVM_S390_UCAS_UNMAP: { struct kvm_s390_ucas_mapping ucasmap;
if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
r = -EFAULT; break;
}
if (!kvm_is_ucontrol(vcpu->kvm)) {
r = -EINVAL; break;
}
r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
ucasmap.length); break;
} #endif case KVM_S390_VCPU_FAULT: {
idx = srcu_read_lock(&vcpu->kvm->srcu);
r = vcpu_dat_fault_handler(vcpu, arg, 0);
srcu_read_unlock(&vcpu->kvm->srcu, idx); break;
} case KVM_ENABLE_CAP:
{ struct kvm_enable_cap cap;
r = -EFAULT; if (copy_from_user(&cap, argp, sizeof(cap))) break;
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); break;
} case KVM_S390_MEM_OP: { struct kvm_s390_mem_op mem_op;
if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
r = kvm_s390_vcpu_memsida_op(vcpu, &mem_op); else
r = -EFAULT; break;
} case KVM_S390_SET_IRQ_STATE: { struct kvm_s390_irq_state irq_state;
r = -EFAULT; if (copy_from_user(&irq_state, argp, sizeof(irq_state))) break; if (irq_state.len > VCPU_IRQS_MAX_BUF ||
irq_state.len == 0 ||
irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
r = -EINVAL; break;
} /* do not use irq_state.flags, it will break old QEMUs */
r = kvm_s390_set_irq_state(vcpu,
(void __user *) irq_state.buf,
irq_state.len); break;
} case KVM_S390_GET_IRQ_STATE: { struct kvm_s390_irq_state irq_state;
r = -EFAULT; if (copy_from_user(&irq_state, argp, sizeof(irq_state))) break; if (irq_state.len == 0) {
r = -EINVAL; break;
} /* do not use irq_state.flags, it will break old QEMUs */
r = kvm_s390_get_irq_state(vcpu,
(__u8 __user *) irq_state.buf,
irq_state.len); break;
} case KVM_S390_PV_CPU_COMMAND: { struct kvm_pv_cmd cmd;
r = -EINVAL; if (!is_prot_virt_host()) break;
r = -EFAULT; if (copy_from_user(&cmd, argp, sizeof(cmd))) break;
r = -EINVAL; if (cmd.flags) break;
/* We only handle this cmd right now */ if (cmd.cmd != KVM_PV_DUMP) break;
r = kvm_s390_handle_pv_vcpu_dump(vcpu, &cmd);
/* Always copy over UV rc / rrc data */ if (copy_to_user((__u8 __user *)argp, &cmd.rc, sizeof(cmd.rc) + sizeof(cmd.rrc)))
r = -EFAULT; break;
} default:
r = -ENOTTY;
}
if (kvm_is_ucontrol(kvm) && new->id < KVM_USER_MEM_SLOTS) return -EINVAL;
/* When we are protected, we should not change the memory slots */ if (kvm_s390_pv_get_handle(kvm)) return -EINVAL;
if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) { /* * A few sanity checks. We can have memory slots which have to be * located/ended at a segment boundary (1MB). The memory in userland is * ok to be fragmented into various different vmas. It is okay to mmap() * and munmap() stuff in this slot after doing this call at any time
*/
if (new->userspace_addr & 0xffffful) return -EINVAL;
/* * Turn off migration mode when: * - userspace creates a new memslot with dirty logging off, * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and * dirty logging is turned off. * Migration mode expects dirty page logging being enabled to store * its dirty bitmap.
*/ if (change != KVM_MR_DELETE &&
!(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
WARN(kvm_s390_vm_stop_migration(kvm), "Failed to stop migration mode");
/* * Enable autoloading of the kvm module. * Note that we add the module alias here instead of virt/kvm/kvm_main.c * since x86 takes a different approach.
*/ #include <linux/miscdevice.h>
MODULE_ALIAS_MISCDEV(KVM_MINOR);
MODULE_ALIAS("devname:kvm");
Messung V0.5 in Prozent
¤ Dauer der Verarbeitung: 0.108 Sekunden
(vorverarbeitet am 2026-04-29)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.