/* * Handle IRQ/context state management when entering from kernel mode. * Before this function is called it is not safe to call regular kernel code, * instrumentable code, or any code which may trigger an exception. * * This is intended to match the logic in irqentry_enter(), handling the kernel * mode transitions only.
*/ static __always_inline void __enter_from_kernel_mode(struct pt_regs *regs)
{
regs->exit_rcu = false;
if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
lockdep_hardirqs_off(CALLER_ADDR0);
ct_irq_enter();
trace_hardirqs_off_finish();
/* * Handle IRQ/context state management when exiting to kernel mode. * After this function returns it is not safe to call regular kernel code, * instrumentable code, or any code which may trigger an exception. * * This is intended to match the logic in irqentry_exit(), handling the kernel * mode transitions only, and with preemption handled elsewhere.
*/ static __always_inline void __exit_to_kernel_mode(struct pt_regs *regs)
{
lockdep_assert_irqs_disabled();
if (interrupts_enabled(regs)) { if (regs->exit_rcu) {
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
ct_irq_exit();
lockdep_hardirqs_on(CALLER_ADDR0); return;
}
trace_hardirqs_on();
} else { if (regs->exit_rcu)
ct_irq_exit();
}
}
/* * Handle IRQ/context state management when entering from user mode. * Before this function is called it is not safe to call regular kernel code, * instrumentable code, or any code which may trigger an exception.
*/ static __always_inline void __enter_from_user_mode(void)
{
lockdep_hardirqs_off(CALLER_ADDR0);
CT_WARN_ON(ct_state() != CT_STATE_USER);
user_exit_irqoff();
trace_hardirqs_off_finish();
mte_disable_tco_entry(current);
}
/* * Handle IRQ/context state management when exiting to user mode. * After this function returns it is not safe to call regular kernel code, * instrumentable code, or any code which may trigger an exception.
*/ static __always_inline void __exit_to_user_mode(void)
{
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
user_enter_irqoff();
lockdep_hardirqs_on(CALLER_ADDR0);
}
staticvoid do_notify_resume(struct pt_regs *regs, unsignedlong thread_flags)
{ do {
local_irq_enable();
if (thread_flags & (_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY))
schedule();
if (thread_flags & _TIF_UPROBE)
uprobe_notify_resume(regs);
/* * Handle IRQ/context state management when entering an NMI from user/kernel * mode. Before this function is called it is not safe to call regular kernel * code, instrumentable code, or any code which may trigger an exception.
*/ staticvoid noinstr arm64_enter_nmi(struct pt_regs *regs)
{
regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
/* * Handle IRQ/context state management when exiting an NMI from user/kernel * mode. After this function returns it is not safe to call regular kernel * code, instrumentable code, or any code which may trigger an exception.
*/ staticvoid noinstr arm64_exit_nmi(struct pt_regs *regs)
{ bool restore = regs->lockdep_hardirqs;
ftrace_nmi_exit(); if (restore) {
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
}
ct_nmi_exit();
lockdep_hardirq_exit(); if (restore)
lockdep_hardirqs_on(CALLER_ADDR0);
__nmi_exit();
}
/* * Handle IRQ/context state management when entering a debug exception from * kernel mode. Before this function is called it is not safe to call regular * kernel code, instrumentable code, or any code which may trigger an exception.
*/ staticvoid noinstr arm64_enter_el1_dbg(struct pt_regs *regs)
{
regs->lockdep_hardirqs = lockdep_hardirqs_enabled();
/* * Handle IRQ/context state management when exiting a debug exception from * kernel mode. After this function returns it is not safe to call regular * kernel code, instrumentable code, or any code which may trigger an exception.
*/ staticvoid noinstr arm64_exit_el1_dbg(struct pt_regs *regs)
{ bool restore = regs->lockdep_hardirqs;
if (restore) {
trace_hardirqs_on_prepare();
lockdep_hardirqs_on_prepare();
}
ct_nmi_exit(); if (restore)
lockdep_hardirqs_on(CALLER_ADDR0);
}
staticvoid __sched arm64_preempt_schedule_irq(void)
{ if (!need_irq_preemption()) return;
/* * Note: thread_info::preempt_count includes both thread_info::count * and thread_info::need_resched, and is not equivalent to * preempt_count().
*/ if (READ_ONCE(current_thread_info()->preempt_count) != 0) return;
/* * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC * priority masking is used the GIC irqchip driver will clear DAIF.IF * using gic_arch_enable_irqs() for normal IRQs. If anything is set in * DAIF we must have handled an NMI, so skip preemption.
*/ if (system_uses_irq_prio_masking() && read_sysreg(daif)) return;
/* * Preempting a task from an IRQ means we leave copies of PSTATE * on the stack. cpufeature's enable calls may modify PSTATE, but * resuming one of these preempted tasks would undo those changes. * * Only allow a task to be preempted once cpufeatures have been * enabled.
*/ if (system_capabilities_finalized())
preempt_schedule_irq();
}
static __always_inline bool
cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
{ if (!__this_cpu_read(__in_cortex_a76_erratum_1463225_wa)) returnfalse;
/* * We've taken a dummy step exception from the kernel to ensure * that interrupts are re-enabled on the syscall path. Return back * to cortex_a76_erratum_1463225_svc_handler() with debug exceptions * masked so that we can safely restore the mdscr and get on with * handling the syscall.
*/
regs->pstate |= PSR_D_BIT; returntrue;
} #else/* CONFIG_ARM64_ERRATUM_1463225 */ staticvoid cortex_a76_erratum_1463225_svc_handler(void) { } staticbool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs)
{ returnfalse;
} #endif/* CONFIG_ARM64_ERRATUM_1463225 */
/* * As per the ABI exit SME streaming mode and clear the SVE state not * shared with FPSIMD on syscall entry.
*/ staticinlinevoid fpsimd_syscall_enter(void)
{ /* Ensure PSTATE.SM is clear, but leave PSTATE.ZA as-is. */ if (system_supports_sme())
sme_smstop_sm();
/* * The CPU is not in streaming mode. If non-streaming SVE is not * supported, there is no SVE state that needs to be discarded.
*/ if (!system_supports_sve()) return;
if (test_thread_flag(TIF_SVE)) { unsignedint sve_vq_minus_one;
/* * Any live non-FPSIMD SVE state has been zeroed. Allow * fpsimd_save_user_state() to lazily discard SVE state until either * the live state is unbound or fpsimd_syscall_exit() is called.
*/
__this_cpu_write(fpsimd_last_state.to_save, FP_STATE_FPSIMD);
}
static __always_inline void fpsimd_syscall_exit(void)
{ if (!system_supports_sve()) return;
/* * The current task's user FPSIMD/SVE/SME state is now bound to this * CPU. The fpsimd_last_state.to_save value is either: * * - FP_STATE_FPSIMD, if the state has not been reloaded on this CPU * since fpsimd_syscall_enter(). * * - FP_STATE_CURRENT, if the state has been reloaded on this CPU at * any point. * * Reset this to FP_STATE_CURRENT to stop lazy discarding.
*/
__this_cpu_write(fpsimd_last_state.to_save, FP_STATE_CURRENT);
}
/* * In debug exception context, we explicitly disable preemption despite * having interrupts disabled. * This serves two purposes: it makes it much less likely that we would * accidentally schedule in exception context and it will force a warning * if we somehow manage to schedule by accident.
*/ staticvoid debug_exception_enter(struct pt_regs *regs)
{
preempt_disable();
/* This code is a bit fragile. Test it. */
RCU_LOCKDEP_WARN(!rcu_is_watching(), "exception_enter didn't work");
}
NOKPROBE_SYMBOL(debug_exception_enter);
staticvoid noinstr el1_softstp(struct pt_regs *regs, unsignedlong esr)
{
arm64_enter_el1_dbg(regs); if (!cortex_a76_erratum_1463225_debug_handler(regs)) {
debug_exception_enter(regs); /* * After handling a breakpoint, we suspend the breakpoint * and use single-step to move to the next instruction. * If we are stepping a suspended breakpoint there's nothing more to do: * the single-step is complete.
*/ if (!try_step_suspended_breakpoints(regs))
do_el1_softstep(esr, regs);
debug_exception_exit(regs);
}
arm64_exit_el1_dbg(regs);
}
staticvoid noinstr el1_watchpt(struct pt_regs *regs, unsignedlong esr)
{ /* Watchpoints are the only debug exception to write FAR_EL1 */ unsignedlong far = read_sysreg(far_el1);
switch (ESR_ELx_EC(esr)) { case ESR_ELx_EC_DABT_CUR: case ESR_ELx_EC_IABT_CUR:
el1_abort(regs, esr); break; /* * We don't handle ESR_ELx_EC_SP_ALIGN, since we will have hit a * recursive exception when trying to push the initial pt_regs.
*/ case ESR_ELx_EC_PC_ALIGN:
el1_pc(regs, esr); break; case ESR_ELx_EC_SYS64: case ESR_ELx_EC_UNKNOWN:
el1_undef(regs, esr); break; case ESR_ELx_EC_BTI:
el1_bti(regs, esr); break; case ESR_ELx_EC_GCS:
el1_gcs(regs, esr); break; case ESR_ELx_EC_MOPS:
el1_mops(regs, esr); break; case ESR_ELx_EC_BREAKPT_CUR:
el1_breakpt(regs, esr); break; case ESR_ELx_EC_SOFTSTP_CUR:
el1_softstp(regs, esr); break; case ESR_ELx_EC_WATCHPT_CUR:
el1_watchpt(regs, esr); break; case ESR_ELx_EC_BRK64:
el1_brk64(regs, esr); break; case ESR_ELx_EC_FPAC:
el1_fpac(regs, esr); break; default:
__panic_unhandled(regs, "64-bit el1h sync", esr);
}
}
/* * We've taken an instruction abort from userspace and not yet * re-enabled IRQs. If the address is a kernel address, apply * BP hardening prior to enabling IRQs and pre-emption.
*/ if (!is_ttbr0_addr(far))
arm64_apply_bp_hardening();
if (!is_ttbr0_addr(regs->pc))
arm64_apply_bp_hardening();
enter_from_user_mode(regs); /* * After handling a breakpoint, we suspend the breakpoint * and use single-step to move to the next instruction. * If we are stepping a suspended breakpoint there's nothing more to do: * the single-step is complete.
*/
step_done = try_step_suspended_breakpoints(regs);
local_daif_restore(DAIF_PROCCTX); if (!step_done)
do_el0_softstep(esr, regs);
exit_to_user_mode(regs);
}
staticvoid noinstr el0_watchpt(struct pt_regs *regs, unsignedlong esr)
{ /* Watchpoints are the only debug exception to write FAR_EL1 */ unsignedlong far = read_sysreg(far_el1);
switch (ESR_ELx_EC(esr)) { case ESR_ELx_EC_SVC32:
el0_svc_compat(regs); break; case ESR_ELx_EC_DABT_LOW:
el0_da(regs, esr); break; case ESR_ELx_EC_IABT_LOW:
el0_ia(regs, esr); break; case ESR_ELx_EC_FP_ASIMD:
el0_fpsimd_acc(regs, esr); break; case ESR_ELx_EC_FP_EXC32:
el0_fpsimd_exc(regs, esr); break; case ESR_ELx_EC_PC_ALIGN:
el0_pc(regs, esr); break; case ESR_ELx_EC_UNKNOWN: case ESR_ELx_EC_CP14_MR: case ESR_ELx_EC_CP14_LS: case ESR_ELx_EC_CP14_64:
el0_undef(regs, esr); break; case ESR_ELx_EC_CP15_32: case ESR_ELx_EC_CP15_64:
el0_cp15(regs, esr); break; case ESR_ELx_EC_BREAKPT_LOW:
el0_breakpt(regs, esr); break; case ESR_ELx_EC_SOFTSTP_LOW:
el0_softstp(regs, esr); break; case ESR_ELx_EC_WATCHPT_LOW:
el0_watchpt(regs, esr); break; case ESR_ELx_EC_BKPT32:
el0_bkpt32(regs, esr); break; default:
el0_inv(regs, esr);
}
}
/* * We didn't take an exception to get here, so the HW hasn't * set/cleared bits in PSTATE that we may rely on. * * The original SDEI spec (ARM DEN 0054A) can be read ambiguously as to * whether PSTATE bits are inherited unchanged or generated from * scratch, and the TF-A implementation always clears PAN and always * clears UAO. There are no other known implementations. * * Subsequent revisions (ARM DEN 0054B) follow the usual rules for how * PSTATE is modified upon architectural exceptions, and so PAN is * either inherited or set per SCTLR_ELx.SPAN, and UAO is always * cleared. * * We must explicitly reset PAN to the expected state, including * clearing it when the host isn't using it, in case a VM had it set.
*/ if (system_uses_hw_pan())
set_pstate_pan(1); elseif (cpu_has_pan())
set_pstate_pan(0);
arm64_enter_nmi(regs);
ret = do_sdei_event(regs, arg);
arm64_exit_nmi(regs);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.