/* * To use atomic bitmap functions, we have to provide a bitmap address * that is u64 aligned. However, the ipm might be u32 aligned. * Therefore, we logically start the bitmap at the very beginning of the * struct and fixup the bit number.
*/ #define IPM_BIT_OFFSET (offsetof(struct kvm_s390_gisa, ipm) * BITS_PER_BYTE)
/** * gisa_set_iam - change the GISA interruption alert mask * * @gisa: gisa to operate on * @iam: new IAM value to use * * Change the IAM atomically with the next alert address and the IPM * of the GISA if the GISA is not part of the GIB alert list. All three * fields are located in the first long word of the GISA. * * Returns: 0 on success * -EBUSY in case the gisa is part of the alert list
*/ staticinlineint gisa_set_iam(struct kvm_s390_gisa *gisa, u8 iam)
{
u64 word, _word;
word = READ_ONCE(gisa->u64.word[0]); do { if ((u64)gisa != word >> 32) return -EBUSY;
_word = (word & ~0xffUL) | iam;
} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
return 0;
}
/** * gisa_clear_ipm - clear the GISA interruption pending mask * * @gisa: gisa to operate on * * Clear the IPM atomically with the next alert address and the IAM * of the GISA unconditionally. All three fields are located in the * first long word of the GISA.
*/ staticinlinevoid gisa_clear_ipm(struct kvm_s390_gisa *gisa)
{
u64 word, _word;
word = READ_ONCE(gisa->u64.word[0]); do {
_word = word & ~(0xffUL << 24);
} while (!try_cmpxchg(&gisa->u64.word[0], &word, _word));
}
/** * gisa_get_ipm_or_restore_iam - return IPM or restore GISA IAM * * @gi: gisa interrupt struct to work on * * Atomically restores the interruption alert mask if none of the * relevant ISCs are pending and return the IPM. * * Returns: the relevant pending ISCs
*/ staticinline u8 gisa_get_ipm_or_restore_iam(struct kvm_s390_gisa_interrupt *gi)
{
u8 pending_mask, alert_mask;
u64 word, _word;
word = READ_ONCE(gi->origin->u64.word[0]); do {
alert_mask = READ_ONCE(gi->alert.mask);
pending_mask = (u8)(word >> 24) & alert_mask; if (pending_mask) return pending_mask;
_word = (word & ~0xffUL) | alert_mask;
} while (!try_cmpxchg(&gi->origin->u64.word[0], &word, _word));
active_mask = pending_irqs(vcpu); if (!active_mask) return 0;
if (psw_extint_disabled(vcpu))
active_mask &= ~IRQ_PEND_EXT_MASK; if (psw_ioint_disabled(vcpu))
active_mask &= ~IRQ_PEND_IO_MASK; else
active_mask = disable_iscs(vcpu, active_mask); if (!(vcpu->arch.sie_block->gcr[0] & CR0_EXTERNAL_CALL_SUBMASK))
__clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & CR0_EMERGENCY_SIGNAL_SUBMASK))
__clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & CR0_CLOCK_COMPARATOR_SUBMASK))
__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & CR0_CPU_TIMER_SUBMASK))
__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask); if (!(vcpu->arch.sie_block->gcr[0] & CR0_SERVICE_SIGNAL_SUBMASK)) {
__clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
__clear_bit(IRQ_PEND_EXT_SERVICE_EV, &active_mask);
} if (psw_mchk_disabled(vcpu))
active_mask &= ~IRQ_PEND_MCHK_MASK; /* PV guest cpus can have a single interruption injected at a time. */ if (kvm_s390_pv_cpu_get_handle(vcpu) &&
vcpu->arch.sie_block->iictl != IICTL_CODE_NONE)
active_mask &= ~(IRQ_PEND_EXT_II_MASK |
IRQ_PEND_IO_MASK |
IRQ_PEND_MCHK_MASK); /* * Check both floating and local interrupt's cr14 because * bit IRQ_PEND_MCHK_REP could be set in both cases.
*/ if (!(vcpu->arch.sie_block->gcr[14] &
(vcpu->kvm->arch.float_int.mchk.cr14 |
vcpu->arch.local_int.irq.mchk.cr14)))
__clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
/* * STOP irqs will never be actively delivered. They are triggered via * intercept requests and cleared when the stop intercept is performed.
*/
__clear_bit(IRQ_PEND_SIGP_STOP, &active_mask);
staticint __write_machine_check(struct kvm_vcpu *vcpu, struct kvm_s390_mchk_info *mchk)
{ unsignedlong ext_sa_addr; unsignedlong lc;
freg_t fprs[NUM_FPRS]; union mci mci; int rc;
/* * All other possible payload for a machine check (e.g. the register * contents in the save area) will be handled by the ultravisor, as * the hypervisor does not not have the needed information for * protected guests.
*/ if (kvm_s390_pv_cpu_is_protected(vcpu)) {
vcpu->arch.sie_block->iictl = IICTL_CODE_MCHK;
vcpu->arch.sie_block->mcic = mchk->mcic;
vcpu->arch.sie_block->faddr = mchk->failing_storage_address;
vcpu->arch.sie_block->edc = mchk->ext_damage_code; return 0;
}
mci.val = mchk->mcic; /* take care of lazy register loading */
kvm_s390_fpu_store(vcpu->run);
save_access_regs(vcpu->run->s.regs.acrs); if (cpu_has_gs() && vcpu->arch.gs_enabled)
save_gs_cb(current->thread.gs_cb);
/* Extended save area */
rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr, sizeof(unsignedlong)); /* Only bits 0 through 63-LC are used for address formation */
lc = ext_sa_addr & MCESA_LC_MASK; if (test_kvm_facility(vcpu->kvm, 133)) { switch (lc) { case 0: case 10:
ext_sa_addr &= ~0x3ffUL; break; case 11:
ext_sa_addr &= ~0x7ffUL; break; case 12:
ext_sa_addr &= ~0xfffUL; break; default:
ext_sa_addr = 0; break;
}
} else {
ext_sa_addr &= ~0x3ffUL;
}
spin_lock(&fi->lock);
spin_lock(&li->lock); if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) ||
test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) { /* * If there was an exigent machine check pending, then any * repressible machine checks that might have been pending * are indicated along with it, so always clear bits for * repressible and exigent interrupts
*/
mchk = li->irq.mchk;
clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
memset(&li->irq.mchk, 0, sizeof(mchk));
deliver = 1;
} /* * We indicate floating repressible conditions along with * other pending conditions. Channel Report Pending and Channel * Subsystem damage are the only two and are indicated by * bits in mcic and masked in cr14.
*/ if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
mchk.mcic |= fi->mchk.mcic;
mchk.cr14 |= fi->mchk.cr14;
memset(&fi->mchk, 0, sizeof(mchk));
deliver = 1;
}
spin_unlock(&li->lock);
spin_unlock(&fi->lock);
/* PER is handled by the ultravisor */ if (kvm_s390_pv_cpu_is_protected(vcpu)) return __deliver_prog_pv(vcpu, pgm_info.code & ~PGM_PER);
switch (pgm_info.code & ~PGM_PER) { case PGM_AFX_TRANSLATION: case PGM_ASX_TRANSLATION: case PGM_EX_TRANSLATION: case PGM_LFX_TRANSLATION: case PGM_LSTE_SEQUENCE: case PGM_LSX_TRANSLATION: case PGM_LX_TRANSLATION: case PGM_PRIMARY_AUTHORITY: case PGM_SECONDARY_AUTHORITY:
nullifying = true;
fallthrough; case PGM_SPACE_SWITCH:
rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE); break; case PGM_ALEN_TRANSLATION: case PGM_ALE_SEQUENCE: case PGM_ASTE_INSTANCE: case PGM_ASTE_SEQUENCE: case PGM_ASTE_VALIDITY: case PGM_EXTENDED_AUTHORITY:
rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
(u8 *)__LC_EXC_ACCESS_ID);
nullifying = true; break; case PGM_ASCE_TYPE: case PGM_PAGE_TRANSLATION: case PGM_REGION_FIRST_TRANS: case PGM_REGION_SECOND_TRANS: case PGM_REGION_THIRD_TRANS: case PGM_SEGMENT_TRANSLATION:
rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE);
rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
(u8 *)__LC_EXC_ACCESS_ID);
rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
(u8 *)__LC_OP_ACCESS_ID);
nullifying = true; break; case PGM_MONITOR:
rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
(u16 *)__LC_MON_CLASS_NR);
rc |= put_guest_lc(vcpu, pgm_info.mon_code,
(u64 *)__LC_MON_CODE); break; case PGM_VECTOR_PROCESSING: case PGM_DATA:
rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
(u32 *)__LC_DATA_EXC_CODE); break; case PGM_PROTECTION:
rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE);
rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
(u8 *)__LC_EXC_ACCESS_ID); break; case PGM_STACK_FULL: case PGM_STACK_EMPTY: case PGM_STACK_SPECIFICATION: case PGM_STACK_TYPE: case PGM_STACK_OPERATION: case PGM_TRACE_TABEL: case PGM_CRYPTO_OPERATION:
nullifying = true; break;
}
/* * The VCPU might not be sleeping but rather executing VSIE. Let's * kick it, so it leaves the SIE to process the request.
*/
kvm_s390_vsie_kick(vcpu);
}
/* * If the monotonic clock runs faster than the tod clock we might be * woken up too early and have to go back to sleep to avoid deadlocks.
*/ if (sltime && hrtimer_forward_now(timer, ns_to_ktime(sltime))) return HRTIMER_RESTART;
kvm_s390_vcpu_wakeup(vcpu); return HRTIMER_NORESTART;
}
/* pending ckc conditions might have been invalidated */
clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); if (ckc_irq_pending(vcpu))
set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
/* pending cpu timer conditions might have been invalidated */
clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); if (cpu_timer_irq_pending(vcpu))
set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
while ((irqs = deliverable_irqs(vcpu)) && !rc) { /* bits are in the reverse order of interrupt priority */
irq_type = find_last_bit(&irqs, IRQ_PEND_COUNT); switch (irq_type) { case IRQ_PEND_IO_ISC_0: case IRQ_PEND_IO_ISC_1: case IRQ_PEND_IO_ISC_2: case IRQ_PEND_IO_ISC_3: case IRQ_PEND_IO_ISC_4: case IRQ_PEND_IO_ISC_5: case IRQ_PEND_IO_ISC_6: case IRQ_PEND_IO_ISC_7:
rc = __deliver_io(vcpu, irq_type); break; case IRQ_PEND_MCHK_EX: case IRQ_PEND_MCHK_REP:
rc = __deliver_machine_check(vcpu); break; case IRQ_PEND_PROG:
rc = __deliver_prog(vcpu); break; case IRQ_PEND_EXT_EMERGENCY:
rc = __deliver_emergency_signal(vcpu); break; case IRQ_PEND_EXT_EXTERNAL:
rc = __deliver_external_call(vcpu); break; case IRQ_PEND_EXT_CLOCK_COMP:
rc = __deliver_ckc(vcpu); break; case IRQ_PEND_EXT_CPU_TIMER:
rc = __deliver_cpu_timer(vcpu); break; case IRQ_PEND_RESTART:
rc = __deliver_restart(vcpu); break; case IRQ_PEND_SET_PREFIX:
rc = __deliver_set_prefix(vcpu); break; case IRQ_PEND_PFAULT_INIT:
rc = __deliver_pfault_init(vcpu); break; case IRQ_PEND_EXT_SERVICE:
rc = __deliver_service(vcpu); break; case IRQ_PEND_EXT_SERVICE_EV:
rc = __deliver_service_ev(vcpu); break; case IRQ_PEND_PFAULT_DONE:
rc = __deliver_pfault_done(vcpu); break; case IRQ_PEND_VIRTIO:
rc = __deliver_virtio(vcpu); break; default:
WARN_ONCE(1, "Unknown pending irq type %ld", irq_type);
clear_bit(irq_type, &li->pending_irqs);
}
delivered |= !rc;
}
/* * We delivered at least one interrupt and modified the PC. Force a * singlestep event now.
*/ if (delivered && guestdbg_sstep_enabled(vcpu)) { struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch;
if (!(irq->u.pgm.flags & KVM_S390_PGM_FLAGS_ILC_VALID)) { /* auto detection if no valid ILC was given */
irq->u.pgm.flags &= ~KVM_S390_PGM_FLAGS_ILC_MASK;
irq->u.pgm.flags |= kvm_s390_get_ilen(vcpu);
irq->u.pgm.flags |= KVM_S390_PGM_FLAGS_ILC_VALID;
}
/* * Because repressible machine checks can be indicated along with * exigent machine checks (PoP, Chapter 11, Interruption action) * we need to combine cr14, mcic and external damage code. * Failing storage address and the logout area should not be or'ed * together, we just indicate the last occurrence of the corresponding * machine check
*/
mchk->cr14 |= irq->u.mchk.cr14;
mchk->mcic |= irq->u.mchk.mcic;
mchk->ext_damage_code |= irq->u.mchk.ext_damage_code;
mchk->failing_storage_address = irq->u.mchk.failing_storage_address;
memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout, sizeof(mchk->fixed_logout)); if (mchk->mcic & MCHK_EX_MASK)
set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs); elseif (mchk->mcic & MCHK_REP_MASK)
set_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs); return 0;
}
/* * Dequeue and return an I/O interrupt matching any of the interruption * subclasses as designated by the isc mask in cr6 and the schid (if != 0). * Take into account the interrupts pending in the interrupt list and in GISA. * * Note that for a guest that does not enable I/O interrupts * but relies on TPI, a flood of classic interrupts may starve * out adapter interrupts on the same isc. Linux does not do * that, and it is possible to work around the issue by configuring * different iscs for classic and adapter interrupts in the guest, * but we may want to revisit this in the future.
*/ struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
u64 isc_mask, u32 schid)
{ struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_s390_interrupt_info *inti, *tmp_inti; int isc;
inti = get_top_io_int(kvm, isc_mask, schid);
isc = get_top_gisa_isc(kvm, isc_mask, schid); if (isc < 0) /* no AI in GISA */ goto out;
if (!inti) /* AI in GISA but no classical IO int */ goto gisa_out;
/* both types of interrupts present */ if (int_word_to_isc(inti->io.io_int_word) <= isc) { /* classical IO int with higher priority */
gisa_set_ipm_gisc(gi->origin, isc); goto out;
}
gisa_out:
tmp_inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (tmp_inti) {
tmp_inti->type = KVM_S390_INT_IO(1, 0, 0, 0);
tmp_inti->io.io_int_word = isc_to_int_word(isc); if (inti)
kvm_s390_reinject_io_int(kvm, inti);
inti = tmp_inti;
} else
gisa_set_ipm_gisc(gi->origin, isc);
out: return inti;
}
/* We always allow events, track them separately from the sccb ints */ if (fi->srv_signal.ext_params & SCCB_EVENT_PENDING)
set_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs);
/* * Early versions of the QEMU s390 bios will inject several * service interrupts after another without handling a * condition code indicating busy. * We will silently ignore those superfluous sccb values. * A future version of QEMU will take care of serialization * of servc requests
*/ if (fi->srv_signal.ext_params & SCCB_MASK) goto out;
fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK;
set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
out:
spin_unlock(&fi->lock);
kfree(inti); return 0;
}
/* * We do not use the lock checking variant as this is just a * performance optimization and we do not hold the lock here. * This is ok as the code will pick interrupts from both "lists" * for delivery.
*/ if (gi->origin && inti->type & KVM_S390_INT_IO_AI_MASK) {
VM_EVENT(kvm, 4, "%s isc %1u", "inject: I/O (AI/gisa)", isc);
gisa_set_ipm_gisc(gi->origin, isc);
kfree(inti); return 0;
}
fi = &kvm->arch.float_int;
spin_lock(&fi->lock); if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
spin_unlock(&fi->lock); return -EBUSY;
}
fi->counters[FIRQ_CNTR_IO] += 1;
/* * Find a destination VCPU for a floating irq and kick it.
*/ staticvoid __floating_irq_kick(struct kvm *kvm, u64 type)
{ struct kvm_vcpu *dst_vcpu; int sigcpu, online_vcpus, nr_tries = 0;
online_vcpus = atomic_read(&kvm->online_vcpus); if (!online_vcpus) return;
/* find idle VCPUs first, then round robin */
sigcpu = find_first_bit(kvm->arch.idle_mask, online_vcpus); if (sigcpu == online_vcpus) { do {
sigcpu = kvm->arch.float_int.next_rr_cpu++;
kvm->arch.float_int.next_rr_cpu %= online_vcpus; /* avoid endless loops if all vcpus are stopped */ if (nr_tries++ >= online_vcpus) return;
} while (is_vcpu_stopped(kvm_get_vcpu(kvm, sigcpu)));
}
dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
/* make the VCPU drop out of the SIE, or wake it up if sleeping */ switch (type) { case KVM_S390_MCHK:
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_STOP_INT); break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX: if (!(type & KVM_S390_INT_IO_AI_MASK &&
kvm->arch.gisa_int.origin) ||
kvm_s390_pv_cpu_get_handle(dst_vcpu))
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_IO_INT); break; default:
kvm_s390_set_cpuflags(dst_vcpu, CPUSTAT_EXT_INT); break;
}
kvm_s390_vcpu_wakeup(dst_vcpu);
}
staticint __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
{
u64 type = READ_ONCE(inti->type); int rc;
switch (type) { case KVM_S390_MCHK:
rc = __inject_float_mchk(kvm, inti); break; case KVM_S390_INT_VIRTIO:
rc = __inject_virtio(kvm, inti); break; case KVM_S390_INT_SERVICE:
rc = __inject_service(kvm, inti); break; case KVM_S390_INT_PFAULT_DONE:
rc = __inject_pfault_done(kvm, inti); break; case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
rc = __inject_io(kvm, inti); break; default:
rc = -EINVAL;
} if (rc) return rc;
__floating_irq_kick(kvm, type); return 0;
}
int kvm_s390_inject_vm(struct kvm *kvm, struct kvm_s390_interrupt *s390int)
{ struct kvm_s390_interrupt_info *inti; int rc;
inti = kzalloc(sizeof(*inti), GFP_KERNEL_ACCOUNT); if (!inti) return -ENOMEM;
mutex_lock(&kvm->lock); if (!kvm_s390_pv_is_protected(kvm))
fi->masked_irqs = 0;
mutex_unlock(&kvm->lock);
spin_lock(&fi->lock);
fi->pending_irqs = 0;
memset(&fi->srv_signal, 0, sizeof(fi->srv_signal));
memset(&fi->mchk, 0, sizeof(fi->mchk)); for (i = 0; i < FIRQ_LIST_COUNT; i++)
clear_irq_list(&fi->lists[i]); for (i = 0; i < FIRQ_MAX_COUNT; i++)
fi->counters[i] = 0;
spin_unlock(&fi->lock);
kvm_s390_gisa_clear(kvm);
};
staticint get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
{ struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; struct kvm_s390_interrupt_info *inti; struct kvm_s390_float_interrupt *fi; struct kvm_s390_irq *buf; struct kvm_s390_irq *irq; int max_irqs; int ret = 0; int n = 0; int i;
if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0) return -EINVAL;
/* * We are already using -ENOMEM to signal * userspace it may retry with a bigger buffer, * so we need to use something else for this case
*/
buf = vzalloc(len); if (!buf) return -ENOBUFS;
max_irqs = len / sizeof(struct kvm_s390_irq);
if (gi->origin && gisa_get_ipm(gi->origin)) { for (i = 0; i <= MAX_ISC; i++) { if (n == max_irqs) { /* signal userspace to try again */
ret = -ENOMEM; goto out_nolock;
} if (gisa_tac_ipm_gisc(gi->origin, i)) {
irq = (struct kvm_s390_irq *) &buf[n];
irq->type = KVM_S390_INT_IO(1, 0, 0, 0);
irq->u.io.io_int_word = isc_to_int_word(i);
n++;
}
}
}
fi = &kvm->arch.float_int;
spin_lock(&fi->lock); for (i = 0; i < FIRQ_LIST_COUNT; i++) {
list_for_each_entry(inti, &fi->lists[i], list) { if (n == max_irqs) { /* signal userspace to try again */
ret = -ENOMEM; goto out;
}
inti_to_irq(inti, &buf[n]);
n++;
}
} if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs) ||
test_bit(IRQ_PEND_EXT_SERVICE_EV, &fi->pending_irqs)) { if (n == max_irqs) { /* signal userspace to try again */
ret = -ENOMEM; goto out;
}
irq = (struct kvm_s390_irq *) &buf[n];
irq->type = KVM_S390_INT_SERVICE;
irq->u.ext = fi->srv_signal;
n++;
} if (test_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) { if (n == max_irqs) { /* signal userspace to try again */
ret = -ENOMEM; goto out;
}
irq = (struct kvm_s390_irq *) &buf[n];
irq->type = KVM_S390_MCHK;
irq->u.mchk = fi->mchk;
n++;
}
out:
spin_unlock(&fi->lock);
out_nolock: if (!ret && n > 0) { if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
ret = -EFAULT;
}
vfree(buf);
if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) return -EFAULT;
adapter = get_io_adapter(dev->kvm, req.id); if (!adapter) return -EINVAL; switch (req.type) { case KVM_S390_IO_ADAPTER_MASK:
ret = kvm_s390_mask_adapter(dev->kvm, req.id, req.mask); if (ret > 0)
ret = 0; break; /* * The following operations are no longer needed and therefore no-ops. * The gpa to hva translation is done when an IRQ route is set up. The * set_irq code uses get_user_pages_remote() to do the actual write.
*/ case KVM_S390_IO_ADAPTER_MAP: case KVM_S390_IO_ADAPTER_UNMAP:
ret = 0; break; default:
ret = -EINVAL;
}
{ const u64 isc_mask = 0xffUL << 24; /* all iscs set */
u32 schid;
if (attr->flags) return -EINVAL; if (attr->attr != sizeof(schid)) return -EINVAL; if (copy_from_user(&schid, (void __user *) attr->addr, sizeof(schid))) return -EFAULT; if (!schid) return -EINVAL;
kfree(kvm_s390_get_io_int(kvm, isc_mask, schid)); /* * If userspace is conforming to the architecture, we can have at most * one pending I/O interrupt per subchannel, so this is effectively a * clear all.
*/ return 0;
}
staticint modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
{ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int; struct kvm_s390_ais_req req; int ret = 0;
if (!test_kvm_facility(kvm, 72)) return -EOPNOTSUPP;
if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req))) return -EFAULT;
staticint flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{ int r = 0; unsignedlong i; struct kvm_vcpu *vcpu;
switch (attr->group) { case KVM_DEV_FLIC_ENQUEUE:
r = enqueue_floating_irq(dev, attr); break; case KVM_DEV_FLIC_CLEAR_IRQS:
kvm_s390_clear_float_irqs(dev->kvm); break; case KVM_DEV_FLIC_APF_ENABLE: if (kvm_is_ucontrol(dev->kvm)) return -EINVAL;
dev->kvm->arch.gmap->pfault_enabled = 1; break; case KVM_DEV_FLIC_APF_DISABLE_WAIT: if (kvm_is_ucontrol(dev->kvm)) return -EINVAL;
dev->kvm->arch.gmap->pfault_enabled = 0; /* * Make sure no async faults are in transition when * clearing the queues. So we don't need to worry * about late coming workers.
*/
synchronize_srcu(&dev->kvm->srcu);
kvm_for_each_vcpu(i, vcpu, dev->kvm)
kvm_clear_async_pf_completion_queue(vcpu); break; case KVM_DEV_FLIC_ADAPTER_REGISTER:
r = register_io_adapter(dev, attr); break; case KVM_DEV_FLIC_ADAPTER_MODIFY:
r = modify_io_adapter(dev, attr); break; case KVM_DEV_FLIC_CLEAR_IO_IRQ:
r = clear_io_irq(dev->kvm, attr); break; case KVM_DEV_FLIC_AISM:
r = modify_ais_mode(dev->kvm, attr); break; case KVM_DEV_FLIC_AIRQ_INJECT:
r = flic_inject_airq(dev->kvm, attr); break; case KVM_DEV_FLIC_AISM_ALL:
r = flic_ais_mode_set_all(dev->kvm, attr); break; default:
r = -EINVAL;
}
return r;
}
staticint flic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{ switch (attr->group) { case KVM_DEV_FLIC_GET_ALL_IRQS: case KVM_DEV_FLIC_ENQUEUE: case KVM_DEV_FLIC_CLEAR_IRQS: case KVM_DEV_FLIC_APF_ENABLE: case KVM_DEV_FLIC_APF_DISABLE_WAIT: case KVM_DEV_FLIC_ADAPTER_REGISTER: case KVM_DEV_FLIC_ADAPTER_MODIFY: case KVM_DEV_FLIC_CLEAR_IO_IRQ: case KVM_DEV_FLIC_AISM: case KVM_DEV_FLIC_AIRQ_INJECT: case KVM_DEV_FLIC_AISM_ALL: return 0;
} return -ENXIO;
}
/* * < 0 - not injected due to error * = 0 - coalesced, summary indicator already active * > 0 - injected interrupt
*/ staticint set_adapter_int(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status)
{ int ret; struct s390_io_adapter *adapter;
/* We're only interested in the 0->1 transition. */ if (!level) return 0;
adapter = get_io_adapter(kvm, e->adapter.adapter_id); if (!adapter) return -1;
ret = adapter_indicators_set(kvm, adapter, &e->adapter); if ((ret > 0) && !adapter->masked) {
ret = kvm_s390_inject_airq(kvm, adapter); if (ret == 0)
ret = 1;
} return ret;
}
/* * Inject the machine check to the guest.
*/ void kvm_s390_reinject_machine_check(struct kvm_vcpu *vcpu, struct mcck_volatile_info *mcck_info)
{ struct kvm_s390_interrupt_info inti; struct kvm_s390_irq irq; struct kvm_s390_mchk_info *mchk; union mci mci;
__u64 cr14 = 0; /* upper bits are not used */ int rc;
mci.val = mcck_info->mcic; if (mci.sr)
cr14 |= CR14_RECOVERY_SUBMASK; if (mci.dg)
cr14 |= CR14_DEGRADATION_SUBMASK; if (mci.w)
cr14 |= CR14_WARNING_SUBMASK;
int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, conststruct kvm_irq_routing_entry *ue)
{
u64 uaddr_s, uaddr_i; int idx;
switch (ue->type) { /* we store the userspace addresses instead of the guest addresses */ case KVM_IRQ_ROUTING_S390_ADAPTER: if (kvm_is_ucontrol(kvm)) return -EINVAL;
e->set = set_adapter_int;
int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status)
{ return -EINVAL;
}
int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len)
{ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; struct kvm_s390_irq *buf; int r = 0; int n;
buf = vmalloc(len); if (!buf) return -ENOMEM;
if (copy_from_user((void *) buf, irqstate, len)) {
r = -EFAULT; goto out_free;
}
/* * Don't allow setting the interrupt state * when there are already interrupts pending
*/
spin_lock(&li->lock); if (li->pending_irqs) {
r = -EBUSY; goto out_unlock;
}
for (n = 0; n < len / sizeof(*buf); n++) {
r = do_inject_vcpu(vcpu, &buf[n]); if (r) break;
}
do { /* * If the NONE_GISA_ADDR is still stored in the alert list * origin, we will leave the outer loop. No further GISA has * been added to the alert list by millicode while processing * the current alert list.
*/
final = (origin & NONE_GISA_ADDR); /* * Cut off the alert list and store the NONE_GISA_ADDR in the * alert list origin to avoid further GAL interruptions. * A new alert list can be build up by millicode in parallel * for guests not in the yet cut-off alert list. When in the * final loop, store the NULL_GISA_ADDR instead. This will re- * enable GAL interruptions on the host again.
*/
origin = xchg(&gib->alert_list_origin,
(!final) ? NONE_GISA_ADDR : NULL_GISA_ADDR); /* * Loop through the just cut-off alert list and start the * gisa timers to kick idle vcpus to consume the pending * interruptions asap.
*/ while (origin & GISA_ADDR_MASK) {
gisa_phys = origin;
gisa = phys_to_virt(gisa_phys);
origin = gisa->next_alert;
gisa->next_alert = gisa_phys;
kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
gi = &kvm->arch.gisa_int; if (hrtimer_active(&gi->timer))
hrtimer_cancel(&gi->timer);
hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL);
}
} while (!final);
if (!gi->origin) return;
kvm_for_each_vcpu(i, vcpu, kvm) {
mutex_lock(&vcpu->mutex);
vcpu->arch.sie_block->eca &= ~ECA_AIV;
vcpu->arch.sie_block->gd = 0U;
mutex_unlock(&vcpu->mutex);
VCPU_EVENT(vcpu, 3, "AIV disabled for cpu %03u", vcpu->vcpu_id);
}
kvm_s390_gisa_destroy(kvm);
}
/** * kvm_s390_gisc_register - register a guest ISC * * @kvm: the kernel vm to work with * @gisc: the guest interruption sub class to register * * The function extends the vm specific alert mask to use. * The effective IAM mask in the GISA is updated as well * in case the GISA is not part of the GIB alert list. * It will be updated latest when the IAM gets restored * by gisa_get_ipm_or_restore_iam(). * * Returns: the nonspecific ISC (NISC) the gib alert mechanism * has registered with the channel subsystem. * -ENODEV in case the vm uses no GISA * -ERANGE in case the guest ISC is invalid
*/ int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc)
{ struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int;
if (!gi->origin) return -ENODEV; if (gisc > MAX_ISC) return -ERANGE;
/** * kvm_s390_gisc_unregister - unregister a guest ISC * * @kvm: the kernel vm to work with * @gisc: the guest interruption sub class to register * * The function reduces the vm specific alert mask to use. * The effective IAM mask in the GISA is updated as well * in case the GISA is not part of the GIB alert list. * It will be updated latest when the IAM gets restored * by gisa_get_ipm_or_restore_iam(). * * Returns: the nonspecific ISC (NISC) the gib alert mechanism * has registered with the channel subsystem. * -ENODEV in case the vm uses no GISA * -ERANGE in case the guest ISC is invalid * -EINVAL in case the guest ISC is not registered
*/ int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc)
{ struct kvm_s390_gisa_interrupt *gi = &kvm->arch.gisa_int; int rc = 0;
if (!gi->origin) return -ENODEV; if (gisc > MAX_ISC) return -ERANGE;
kvm = kvm_s390_pci_si_to_kvm(aift, si); if (!kvm) return;
gi = &kvm->arch.gisa_int;
if (!(gi->origin->g1.simm & AIS_MODE_MASK(gaite->gisc)) ||
!(gi->origin->g1.nimm & AIS_MODE_MASK(gaite->gisc))) {
gisa_set_ipm_gisc(gi->origin, gaite->gisc); if (hrtimer_active(&gi->timer))
hrtimer_cancel(&gi->timer);
hrtimer_start(&gi->timer, 0, HRTIMER_MODE_REL);
kvm->stat.aen_forward++;
}
}
staticvoid aen_process_gait(u8 isc)
{ bool found = false, first = true; union zpci_sic_iib iib = {{0}}; unsignedlong si, flags;
spin_lock_irqsave(&aift->gait_lock, flags);
if (!aift->gait) {
spin_unlock_irqrestore(&aift->gait_lock, flags); return;
}
for (si = 0;;) { /* Scan adapter summary indicator bit vector */
si = airq_iv_scan(aift->sbv, si, airq_iv_end(aift->sbv)); if (si == -1UL) { if (first || found) { /* Re-enable interrupts. */
zpci_set_irq_ctrl(SIC_IRQ_MODE_SINGLE, isc,
&iib);
first = found = false;
} else { /* Interrupts on and all bits processed */ break;
}
found = false;
si = 0; /* Scan again after re-enabling interrupts */ continue;
}
found = true;
aen_host_forward(si);
}
gib_alert_irq.isc = nisc; if (register_adapter_interrupt(&gib_alert_irq)) {
pr_err("Registering the GIB alert interruption handler failed\n");
rc = -EIO; goto out_free_gib;
} /* adapter interrupts used for AP (applicable here) don't use the LSI */
*gib_alert_irq.lsi_ptr = 0xff;
gib->nisc = nisc;
gib_origin = virt_to_phys(gib); if (chsc_sgib(gib_origin)) {
pr_err("Associating the GIB with the AIV facility failed\n");
free_page((unsignedlong)gib);
gib = NULL;
rc = -EIO; goto out_unreg_gal;
}
if (kvm_s390_pci_interp_allowed()) { if (kvm_s390_pci_aen_init(nisc)) {
pr_err("Initializing AEN for PCI failed\n");
rc = -EIO; goto out_unreg_gal;
}
}
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.157Bemerkung:
(vorverarbeitet am 2026-04-26)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.