/* * The mechanism of our pfault code: if Linux is running as guest, runs a user * space process and the user space process accesses a page that the host has * paged out we get a pfault interrupt. * * This allows us, within the guest, to schedule a different process. Without * this mechanism the host would have to suspend the whole virtual cpu until * the page has been paged in. * * So when we get such an interrupt then we set the state of the current task * to uninterruptible and also set the need_resched flag. Both happens within * interrupt context(!). If we later on want to return to user space we * recognize the need_resched flag and then call schedule(). It's not very * obvious how this works... * * Of course we have a lot of additional fun with the completion interrupt (-> * host signals that a page of a process has been paged in and the process can * continue to run). This interrupt can arrive on any cpu and, since we have * virtual cpus, actually appear before the interrupt that signals that a page * is missing.
*/ staticvoid pfault_interrupt(struct ext_code ext_code, unsignedint param32, unsignedlong param64)
{ struct task_struct *tsk;
__u16 subcode;
pid_t pid;
/* * Get the external interruption subcode & pfault initial/completion * signal bit. VM stores this in the 'cpu address' field associated * with the external interrupt.
*/
subcode = ext_code.subcode; if ((subcode & 0xff00) != __SUBCODE_MASK) return;
inc_irq_stat(IRQEXT_PFL); /* Get the token (= pid of the affected task). */
pid = param64 & LPP_PID_MASK;
rcu_read_lock();
tsk = find_task_by_pid_ns(pid, &init_pid_ns); if (tsk)
get_task_struct(tsk);
rcu_read_unlock(); if (!tsk) return;
spin_lock(&pfault_lock); if (subcode & PF_COMPLETE) { /* signal bit is set -> a page has been swapped in by VM */ if (tsk->thread.pfault_wait == 1) { /* * Initial interrupt was faster than the completion * interrupt. pfault_wait is valid. Set pfault_wait * back to zero and wake up the process. This can * safely be done because the task is still sleeping * and can't produce new pfaults.
*/
tsk->thread.pfault_wait = 0;
list_del(&tsk->thread.list);
wake_up_process(tsk);
put_task_struct(tsk);
} else { /* * Completion interrupt was faster than initial * interrupt. Set pfault_wait to -1 so the initial * interrupt doesn't put the task to sleep. * If the task is not running, ignore the completion * interrupt since it must be a leftover of a PFAULT * CANCEL operation which didn't remove all pending * completion interrupts.
*/ if (task_is_running(tsk))
tsk->thread.pfault_wait = -1;
}
} else { /* signal bit not set -> a real page is missing. */ if (WARN_ON_ONCE(tsk != current)) goto out; if (tsk->thread.pfault_wait == 1) { /* Already on the list with a reference: put to sleep */ goto block;
} elseif (tsk->thread.pfault_wait == -1) { /* * Completion interrupt was faster than the initial * interrupt (pfault_wait == -1). Set pfault_wait * back to zero and exit.
*/
tsk->thread.pfault_wait = 0;
} else { /* * Initial interrupt arrived before completion * interrupt. Let the task sleep. * An extra task reference is needed since a different * cpu may set the task state to TASK_RUNNING again * before the scheduler is reached.
*/
get_task_struct(tsk);
tsk->thread.pfault_wait = 1;
list_add(&tsk->thread.list, &pfault_list);
block: /* * Since this must be a userspace fault, there * is no kernel task state to trample. Rely on the * return to userspace schedule() to block.
*/
__set_current_state(TASK_UNINTERRUPTIBLE);
set_tsk_need_resched(tsk);
set_preempt_need_resched();
}
}
out:
spin_unlock(&pfault_lock);
put_task_struct(tsk);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.