/* * The pointer to the vfpstate structure of the thread which currently * owns the context held in the VFP hardware, or NULL if the hardware * context is invalid. * * For UP, this is sufficient to tell which thread owns the VFP context. * However, for SMP, we also need to check the CPU number stored in the * saved state too to catch migrations.
*/ union vfp_state *vfp_current_hw_state[NR_CPUS];
/* * Claim ownership of the VFP unit. * * The caller may change VFP registers until vfp_state_release() is called. * * local_bh_disable() is used to disable preemption and to disable VFP * processing in softirq context. On PREEMPT_RT kernels local_bh_disable() is * not sufficient because it only serializes soft interrupt related sections * via a local lock, but stays preemptible. Disabling preemption is the right * choice here as bottom half processing is always in thread context on RT * kernels so it implicitly prevents bottom half processing as well.
*/ staticvoid vfp_state_hold(void)
{ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_bh_disable(); else
preempt_disable();
}
staticvoid vfp_state_release(void)
{ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
local_bh_enable(); else
preempt_enable();
}
/* * Is 'thread's most up to date state stored in this CPUs hardware? * Must be called from non-preemptible context.
*/ staticbool vfp_state_in_hw(unsignedint cpu, struct thread_info *thread)
{ #ifdef CONFIG_SMP if (thread->vfpstate.hard.cpu != cpu) returnfalse; #endif return vfp_current_hw_state[cpu] == &thread->vfpstate;
}
/* * Force a reload of the VFP context from the thread structure. We do * this by ensuring that access to the VFP hardware is disabled, and * clear vfp_current_hw_state. Must be called from non-preemptible context.
*/ staticvoid vfp_force_reload(unsignedint cpu, struct thread_info *thread)
{ if (vfp_state_in_hw(cpu, thread)) {
fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
vfp_current_hw_state[cpu] = NULL;
} #ifdef CONFIG_SMP
thread->vfpstate.hard.cpu = NR_CPUS; #endif
}
/* * Disable VFP to ensure we initialize it first. We must ensure * that the modification of vfp_current_hw_state[] and hardware * disable are done for the same CPU and without preemption. * * Do this first to ensure that preemption won't overwrite our * state saving should access to the VFP be enabled at this point.
*/
cpu = get_cpu(); if (vfp_current_hw_state[cpu] == vfp)
vfp_current_hw_state[cpu] = NULL;
fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
put_cpu();
/* * When this function is called with the following 'cmd's, the following * is true while this function is being run: * THREAD_NOTIFY_SWITCH: * - the previously running thread will not be scheduled onto another CPU. * - the next thread to be run (v) will not be running on another CPU. * - thread->cpu is the local CPU number * - not preemptible as we're called in the middle of a thread switch * THREAD_NOTIFY_FLUSH: * - the thread (v) will be running on the local CPU, so * v === current_thread_info() * - thread->cpu is the local CPU number at the time it is accessed, * but may change at any time. * - we could be preempted if tree preempt rcu is enabled, so * it is unsafe to use thread->cpu. * THREAD_NOTIFY_EXIT * - we could be preempted if tree preempt rcu is enabled, so * it is unsafe to use thread->cpu.
*/ staticint vfp_notifier(struct notifier_block *self, unsignedlong cmd, void *v)
{ struct thread_info *thread = v;
u32 fpexc; #ifdef CONFIG_SMP unsignedint cpu; #endif
switch (cmd) { case THREAD_NOTIFY_SWITCH:
fpexc = fmrx(FPEXC);
#ifdef CONFIG_SMP
cpu = thread->cpu;
/* * On SMP, if VFP is enabled, save the old state in * case the thread migrates to a different CPU. The * restoring is done lazily.
*/ if ((fpexc & FPEXC_EN) && vfp_current_hw_state[cpu])
vfp_save_state(vfp_current_hw_state[cpu], fpexc); #endif
/* * Always disable VFP so we can lazily save/restore the * old state.
*/
fmxr(FPEXC, fpexc & ~FPEXC_EN); break;
case THREAD_NOTIFY_FLUSH:
vfp_thread_flush(thread); break;
case THREAD_NOTIFY_EXIT:
vfp_thread_exit(thread); break;
case THREAD_NOTIFY_COPY:
vfp_thread_copy(thread); break;
}
/* * Raise a SIGFPE for the current process. * sicode describes the signal being raised.
*/ staticvoid vfp_raise_sigfpe(unsignedint sicode, struct pt_regs *regs)
{ /* * This is the same as NWFPE, because it's not clear what * this is used for
*/
current->thread.error_code = 0;
current->thread.trap_no = 6;
/* * If any of the status flags are set, update the FPSCR. * Comparison instructions always return at least one of * these flags set.
*/ if (exceptions & (FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V))
fpscr &= ~(FPSCR_N|FPSCR_Z|FPSCR_C|FPSCR_V);
fpscr |= exceptions;
fmxr(FPSCR, fpscr);
#define RAISE(stat,en,sig) \ if (exceptions & stat && fpscr & en) \
si_code = sig;
/* * These are arranged in priority order, least to highest.
*/
RAISE(FPSCR_DZC, FPSCR_DZE, FPE_FLTDIV);
RAISE(FPSCR_IXC, FPSCR_IXE, FPE_FLTRES);
RAISE(FPSCR_UFC, FPSCR_UFE, FPE_FLTUND);
RAISE(FPSCR_OFC, FPSCR_OFE, FPE_FLTOVF);
RAISE(FPSCR_IOC, FPSCR_IOE, FPE_FLTINV);
if (INST_CPRTDO(inst)) { if (!INST_CPRT(inst)) { /* * CPDO
*/ if (vfp_single(inst)) {
exceptions = vfp_single_cpdo(inst, fpscr);
} else {
exceptions = vfp_double_cpdo(inst, fpscr);
}
} else { /* * A CPRT instruction can not appear in FPINST2, nor * can it cause an exception. Therefore, we do not * have to emulate it.
*/
}
} else { /* * A CPDT instruction can not appear in FPINST2, nor can * it cause an exception. Therefore, we do not have to * emulate it.
*/
}
perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc); return exceptions & ~VFP_NAN_FLAG;
}
/* * Package up a bounce condition.
*/ staticvoid VFP_bounce(u32 trigger, u32 fpexc, struct pt_regs *regs)
{
u32 fpscr, orig_fpscr, fpsid, exceptions; int si_code2 = 0; int si_code = 0;
/* * At this point, FPEXC can have the following configuration: * * EX DEX IXE * 0 1 x - synchronous exception * 1 x 0 - asynchronous exception * 1 x 1 - sychronous on VFP subarch 1 and asynchronous on later * 0 0 1 - synchronous on VFP9 (non-standard subarch 1 * implementation), undefined otherwise * * Clear various bits and enable access to the VFP so we can * handle the bounce.
*/
fmxr(FPEXC, fpexc & ~(FPEXC_EX|FPEXC_DEX|FPEXC_FP2V|FPEXC_VV|FPEXC_TRAP_MASK));
/* * Check for the special VFP subarch 1 and FPSCR.IXE bit case
*/ if ((fpsid & FPSID_ARCH_MASK) == (1 << FPSID_ARCH_BIT)
&& (fpscr & FPSCR_IXE)) { /* * Synchronous exception, emulate the trigger instruction
*/ goto emulate;
}
if (fpexc & FPEXC_EX) { /* * Asynchronous exception. The instruction is read from FPINST * and the interrupted instruction has to be restarted.
*/
trigger = fmrx(FPINST);
regs->ARM_pc -= 4;
} elseif (!(fpexc & FPEXC_DEX)) { /* * Illegal combination of bits. It can be caused by an * unallocated VFP instruction but with FPSCR.IXE set and not * on VFP subarch 1.
*/
si_code = vfp_raise_exceptions(VFP_EXCEPTION_ERROR, trigger, fpscr); gotoexit;
}
/* * Modify fpscr to indicate the number of iterations remaining. * If FPEXC.EX is 0, FPEXC.DEX is 1 and the FPEXC.VV bit indicates * whether FPEXC.VECITR or FPSCR.LEN is used.
*/ if (fpexc & (FPEXC_EX | FPEXC_VV)) {
u32 len;
/* * Handle the first FP instruction. We used to take note of the * FPEXC bounce reason, but this appears to be unreliable. * Emulate the bounced instruction instead.
*/
exceptions = vfp_emulate_instruction(trigger, fpscr, regs); if (exceptions)
si_code2 = vfp_raise_exceptions(exceptions, trigger, orig_fpscr);
/* * If there isn't a second FP instruction, exit now. Note that * the FPEXC.FP2V bit is valid only if FPEXC.EX is 1.
*/ if ((fpexc & (FPEXC_EX | FPEXC_FP2V)) != (FPEXC_EX | FPEXC_FP2V)) gotoexit;
/* * The barrier() here prevents fpinst2 being read * before the condition above.
*/
barrier();
trigger = fmrx(FPINST2);
emulate:
exceptions = vfp_emulate_instruction(trigger, orig_fpscr, regs); if (exceptions)
si_code = vfp_raise_exceptions(exceptions, trigger, orig_fpscr); exit:
vfp_state_release(); if (si_code2)
vfp_raise_sigfpe(si_code2, regs); if (si_code)
vfp_raise_sigfpe(si_code, regs);
}
/* * Enable full access to VFP (cp10 and cp11)
*/
set_copro_access(access | CPACC_FULL(10) | CPACC_FULL(11));
}
/* Called by platforms on which we want to disable VFP because it may not be * present on all CPUs within a SMP complex. Needs to be called prior to * vfp_init().
*/ void __init vfp_disable(void)
{ if (VFP_arch) {
pr_debug("%s: should be called prior to vfp_init\n", __func__); return;
}
VFP_arch = 1;
}
/* if vfp is on, then save state for resumption */ if (fpexc & FPEXC_EN) {
pr_debug("%s: saving vfp state\n", __func__);
vfp_save_state(&ti->vfpstate, fpexc);
/* disable, just in case */
fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
} elseif (vfp_current_hw_state[ti->cpu]) { #ifndef CONFIG_SMP
fmxr(FPEXC, fpexc | FPEXC_EN);
vfp_save_state(vfp_current_hw_state[ti->cpu], fpexc);
fmxr(FPEXC, fpexc); #endif
}
/* clear any information we had about last context state */
vfp_current_hw_state[ti->cpu] = NULL;
return 0;
}
staticvoid vfp_pm_resume(void)
{ /* ensure we have access to the vfp */
vfp_enable(NULL);
/* and disable it to ensure the next usage restores the state */
fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
}
staticint vfp_cpu_pm_notifier(struct notifier_block *self, unsignedlong cmd, void *v)
{ switch (cmd) { case CPU_PM_ENTER:
vfp_pm_suspend(); break; case CPU_PM_ENTER_FAILED: case CPU_PM_EXIT:
vfp_pm_resume(); break;
} return NOTIFY_OK;
}
/* * Ensure that the VFP state stored in 'thread->vfpstate' is up to date * with the hardware state.
*/ void vfp_sync_hwstate(struct thread_info *thread)
{
vfp_state_hold();
if (vfp_state_in_hw(raw_smp_processor_id(), thread)) {
u32 fpexc = fmrx(FPEXC);
/* * Save the last VFP state on this CPU.
*/
fmxr(FPEXC, fpexc | FPEXC_EN);
vfp_save_state(&thread->vfpstate, fpexc | FPEXC_EN);
fmxr(FPEXC, fpexc);
}
vfp_state_release();
}
/* Ensure that the thread reloads the hardware VFP state on the next use. */ void vfp_flush_hwstate(struct thread_info *thread)
{ unsignedint cpu = get_cpu();
vfp_force_reload(cpu, thread);
put_cpu();
}
/* * Save the current VFP state into the provided structures and prepare * for entry into a new function (signal handler).
*/ int vfp_preserve_user_clear_hwstate(struct user_vfp *ufp, struct user_vfp_exc *ufp_exc)
{ struct thread_info *thread = current_thread_info(); struct vfp_hard_struct *hwstate = &thread->vfpstate.hard;
/* Ensure that the saved hwstate is up-to-date. */
vfp_sync_hwstate(thread);
/* * Copy the floating point registers. There can be unused * registers see asm/hwcap.h for details.
*/
memcpy(&ufp->fpregs, &hwstate->fpregs, sizeof(hwstate->fpregs));
/* * Copy the status and control register.
*/
ufp->fpscr = hwstate->fpscr;
/* Ensure that VFP is disabled. */
vfp_flush_hwstate(thread);
/* * As per the PCS, clear the length and stride bits for function * entry.
*/
hwstate->fpscr &= ~(FPSCR_LENGTH_MASK | FPSCR_STRIDE_MASK); return 0;
}
/* Sanitise and restore the current VFP state from the provided structures. */ int vfp_restore_user_hwstate(struct user_vfp *ufp, struct user_vfp_exc *ufp_exc)
{ struct thread_info *thread = current_thread_info(); struct vfp_hard_struct *hwstate = &thread->vfpstate.hard; unsignedlong fpexc;
/* Disable VFP to avoid corrupting the new thread state. */
vfp_flush_hwstate(thread);
/* * Copy the floating point registers. There can be unused * registers see asm/hwcap.h for details.
*/
memcpy(&hwstate->fpregs, &ufp->fpregs, sizeof(hwstate->fpregs)); /* * Copy the status and control register.
*/
hwstate->fpscr = ufp->fpscr;
/* * Sanitise and restore the exception registers.
*/
fpexc = ufp_exc->fpexc;
/* Ensure the VFP is enabled. */
fpexc |= FPEXC_EN;
/* Ensure FPINST2 is invalid and the exception flag is cleared. */
fpexc &= ~(FPEXC_EX | FPEXC_FP2V);
hwstate->fpexc = fpexc;
/* * VFP hardware can lose all context when a CPU goes offline. * As we will be running in SMP mode with CPU hotplug, we will save the * hardware state at every thread switch. We clear our held state when * a CPU has been killed, indicating that the VFP hardware doesn't contain * a threads VFP state. When a CPU starts up, we re-enable access to the * VFP hardware. The callbacks below are called on the CPU which * is being offlined/onlined.
*/ staticint vfp_dying_cpu(unsignedint cpu)
{
vfp_current_hw_state[cpu] = NULL; return 0;
}
staticint vfp_kmode_exception(struct pt_regs *regs, unsignedint instr)
{ /* * If we reach this point, a floating point exception has been raised * while running in kernel mode. If the NEON/VFP unit was enabled at the * time, it means a VFP instruction has been issued that requires * software assistance to complete, something which is not currently * supported in kernel mode. * If the NEON/VFP unit was disabled, and the location pointed to below * is properly preceded by a call to kernel_neon_begin(), something has * caused the task to be scheduled out and back in again. In this case, * rebuilding and running with CONFIG_DEBUG_ATOMIC_SLEEP enabled should * be helpful in localizing the problem.
*/ if (fmrx(FPEXC) & FPEXC_EN)
pr_crit("BUG: unsupported FP instruction in kernel mode\n"); else
pr_crit("BUG: FP instruction issued in kernel mode with FP unit disabled\n");
pr_crit("FPEXC == 0x%08x\n", fmrx(FPEXC)); return 1;
}
/* * vfp_support_entry - Handle VFP exception * * @regs: pt_regs structure holding the register state at exception entry * @trigger: The opcode of the instruction that triggered the exception * * Returns 0 if the exception was handled, or an error code otherwise.
*/ staticint vfp_support_entry(struct pt_regs *regs, u32 trigger)
{ struct thread_info *ti = current_thread_info();
u32 fpexc;
if (unlikely(!have_vfp)) return -ENODEV;
if (!user_mode(regs)) return vfp_kmode_exception(regs, trigger);
vfp_state_hold();
fpexc = fmrx(FPEXC);
/* * If the VFP unit was not enabled yet, we have to check whether the * VFP state in the CPU's registers is the most recent VFP state * associated with the process. On UP systems, we don't save the VFP * state eagerly on a context switch, so we may need to save the * VFP state to memory first, as it may belong to another process.
*/ if (!(fpexc & FPEXC_EN)) { /* * Enable the VFP unit but mask the FP exception flag for the * time being, so we can access all the registers.
*/
fpexc |= FPEXC_EN;
fmxr(FPEXC, fpexc & ~FPEXC_EX);
/* * Check whether or not the VFP state in the CPU's registers is * the most recent VFP state associated with this task. On SMP, * migration may result in multiple CPUs holding VFP states * that belong to the same task, but only the most recent one * is valid.
*/ if (!vfp_state_in_hw(ti->cpu, ti)) { if (!IS_ENABLED(CONFIG_SMP) &&
vfp_current_hw_state[ti->cpu] != NULL) { /* * This CPU is currently holding the most * recent VFP state associated with another * task, and we must save that to memory first.
*/
vfp_save_state(vfp_current_hw_state[ti->cpu],
fpexc);
}
/* * We can now proceed with loading the task's VFP state * from memory into the CPU registers.
*/
fpexc = vfp_load_state(&ti->vfpstate);
vfp_current_hw_state[ti->cpu] = &ti->vfpstate; #ifdef CONFIG_SMP /* * Record that this CPU is now the one holding the most * recent VFP state of the task.
*/
ti->vfpstate.hard.cpu = ti->cpu; #endif
}
if (fpexc & FPEXC_EX) /* * Might as well handle the pending exception before * retrying branch out before setting an FPEXC that * stops us reading stuff.
*/ goto bounce;
/* * No FP exception is pending: just enable the VFP and * replay the instruction that trapped.
*/
fmxr(FPEXC, fpexc);
vfp_state_release();
} else { /* Check for synchronous or asynchronous exceptions */ if (!(fpexc & (FPEXC_EX | FPEXC_DEX))) {
u32 fpscr = fmrx(FPSCR);
/* * On some implementations of the VFP subarch 1, * setting FPSCR.IXE causes all the CDP instructions to * be bounced synchronously without setting the * FPEXC.EX bit
*/ if (!(fpscr & FPSCR_IXE)) { if (!(fpscr & FPSCR_LENGTH_MASK)) {
pr_debug("not VFP\n");
vfp_state_release(); return -ENOEXEC;
}
fpexc |= FPEXC_DEX;
}
}
bounce: regs->ARM_pc += 4; /* VFP_bounce() will invoke vfp_state_release() */
VFP_bounce(trigger, fpexc, regs);
}
/* * Kernel mode NEON is only allowed outside of hardirq context with * preemption and softirq processing disabled. This will make sure that * the kernel mode NEON register contents never need to be preserved.
*/
BUG_ON(in_hardirq());
BUG_ON(irqs_disabled());
cpu = __smp_processor_id();
/* * Save the userland NEON/VFP state. Under UP, * the owner could be a task other than 'current'
*/ if (vfp_state_in_hw(cpu, thread))
vfp_save_state(&thread->vfpstate, fpexc); #ifndef CONFIG_SMP elseif (vfp_current_hw_state[cpu] != NULL)
vfp_save_state(vfp_current_hw_state[cpu], fpexc); #endif
vfp_current_hw_state[cpu] = NULL;
}
EXPORT_SYMBOL(kernel_neon_begin);
void kernel_neon_end(void)
{ /* Disable the NEON/VFP unit. */
fmxr(FPEXC, fmrx(FPEXC) & ~FPEXC_EN);
vfp_state_release();
}
EXPORT_SYMBOL(kernel_neon_end);
#endif/* CONFIG_KERNEL_MODE_NEON */
staticint __init vfp_detect(struct pt_regs *regs, unsignedint instr)
{
VFP_arch = UINT_MAX; /* mark as not present */
regs->ARM_pc += 4; return 0;
}
/* * Enable the access to the VFP on all online CPUs so the * following test on FPSID will succeed.
*/ if (cpu_arch >= CPU_ARCH_ARMv6)
on_each_cpu(vfp_enable, NULL, 1);
/* * First check that there is a VFP that we can use. * The handler is already setup to just log calls, so * we just need to read the VFPSID register.
*/
register_undef_hook(&vfp_detect_hook);
barrier();
vfpsid = fmrx(FPSID);
barrier();
unregister_undef_hook(&vfp_detect_hook);
pr_info("VFP support v0.3: "); if (VFP_arch) {
pr_cont("not present\n"); return 0; /* Extract the architecture on CPUID scheme */
} elseif ((read_cpuid_id() & 0x000f0000) == 0x000f0000) {
VFP_arch = vfpsid & FPSID_CPUID_ARCH_MASK;
VFP_arch >>= FPSID_ARCH_BIT; /* * Check for the presence of the Advanced SIMD * load/store instructions, integer and single * precision floating point operations. Only check * for NEON if the hardware has the MVFR registers.
*/ if (IS_ENABLED(CONFIG_NEON) &&
(fmrx(MVFR1) & 0x000fff00) == 0x00011100) {
elf_hwcap |= HWCAP_NEON; for (int i = 0; i < ARRAY_SIZE(neon_support_hook); i++)
register_undef_hook(&neon_support_hook[i]);
}
if (IS_ENABLED(CONFIG_VFPv3)) {
u32 mvfr0 = fmrx(MVFR0); if (((mvfr0 & MVFR0_DP_MASK) >> MVFR0_DP_BIT) == 0x2 ||
((mvfr0 & MVFR0_SP_MASK) >> MVFR0_SP_BIT) == 0x2) {
elf_hwcap |= HWCAP_VFPv3; /* * Check for VFPv3 D16 and VFPv4 D16. CPUs in * this configuration only have 16 x 64bit * registers.
*/ if ((mvfr0 & MVFR0_A_SIMD_MASK) == 1) /* also v4-D16 */
elf_hwcap |= HWCAP_VFPv3D16; else
elf_hwcap |= HWCAP_VFPD32;
}
/* * Check for the presence of Advanced SIMD Dot Product * instructions.
*/
isar6 = read_cpuid_ext(CPUID_EXT_ISAR6); if (cpuid_feature_extract_field(isar6, 4) == 0x1)
elf_hwcap |= HWCAP_ASIMDDP; /* * Check for the presence of Advanced SIMD Floating point * half-precision multiplication instructions.
*/ if (cpuid_feature_extract_field(isar6, 8) == 0x1)
elf_hwcap |= HWCAP_ASIMDFHM; /* * Check for the presence of Advanced SIMD Bfloat16 * floating point instructions.
*/ if (cpuid_feature_extract_field(isar6, 20) == 0x1)
elf_hwcap |= HWCAP_ASIMDBF16; /* * Check for the presence of Advanced SIMD and floating point * Int8 matrix multiplication instructions instructions.
*/ if (cpuid_feature_extract_field(isar6, 24) == 0x1)
elf_hwcap |= HWCAP_I8MM;
/* Extract the architecture version on pre-cpuid scheme */
} else { if (vfpsid & FPSID_NODOUBLE) {
pr_cont("no double precision support\n"); return 0;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.