/* This tick register synchronization scheme is taken entirely from * the ia64 port, see arch/ia64/kernel/smpboot.c for details and credit. * * The only change I've made is to rework it so that the master * initiates the synchonization instead of the slave. -DaveM
*/
/* average best_t0 and best_t1 without overflow: */
tcenter = (best_t0/2 + best_t1/2); if (best_t0 % 2 + best_t1 % 2 == 2)
tcenter++; return tcenter - best_tm;
}
void smp_synchronize_tick_client(void)
{ long i, delta, adj, adjust_latency = 0, done = 0; unsignedlong flags, rt, master_time_stamp; #if DEBUG_TICK_SYNC struct { long rt; /* roundtrip time */ long master; /* master's timestamp */ long diff; /* difference between midpoint and master's timestamp */ long lat; /* estimate of itc adjustment latency */
} t[NUM_ROUNDS]; #endif
go[MASTER] = 1;
while (go[MASTER])
rmb();
local_irq_save(flags);
{ for (i = 0; i < NUM_ROUNDS; i++) {
delta = get_delta(&rt, &master_time_stamp); if (delta == 0)
done = 1; /* let's lock on to this... */
if (!done) { if (i > 0) {
adjust_latency += -delta;
adj = -delta + adjust_latency/4;
} else
adj = -delta;
/* The OBP cpu startup callback truncates the 3rd arg cookie to * 32-bits (I think) so to be safe we have it read the pointer * contained here so we work on >4GB machines. -DaveM
*/ staticstruct thread_info *cpu_new_thread = NULL;
if (this_is_starfire) { /* map to real upaid */
cpu = (((cpu & 0x3c) << 1) |
((cpu & 0x40) >> 4) |
(cpu & 0x3));
}
target = (cpu << 14) | 0x70;
again: /* Ok, this is the real Spitfire Errata #54. * One must read back from a UDB internal register * after writes to the UDB interrupt dispatch, but * before the membar Sync for that write. * So we use the high UDB control register (ASI 0x7f, * ADDR 0x20) for the dummy read. -DaveM
*/
tmp = 0x40;
__asm__ __volatile__( "wrpr %1, %2, %%pstate\n\t" "stxa %4, [%0] %3\n\t" "stxa %5, [%0+%8] %3\n\t" "add %0, %8, %0\n\t" "stxa %6, [%0+%8] %3\n\t" "membar #Sync\n\t" "stxa %%g0, [%7] %3\n\t" "membar #Sync\n\t" "mov 0x20, %%g1\n\t" "ldxa [%%g1] 0x7f, %%g0\n\t" "membar #Sync"
: "=r" (tmp)
: "r" (pstate), "i" (PSTATE_IE), "i" (ASI_INTR_W), "r" (data0), "r" (data1), "r" (data2), "r" (target), "r" (0x10), "0" (tmp)
: "g1");
/* NOTE: PSTATE_IE is still clear. */
stuck = 100000; do {
__asm__ __volatile__("ldxa [%%g0] %1, %0"
: "=r" (result)
: "i" (ASI_INTR_DISPATCH_STAT)); if (result == 0) {
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
: : "r" (pstate)); return;
}
stuck -= 1; if (stuck == 0) break;
} while (result & 0x1);
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
: : "r" (pstate)); if (stuck == 0) {
printk("CPU[%d]: mondo stuckage result[%016llx]\n",
smp_processor_id(), result);
} else {
udelay(2); goto again;
}
}
staticvoid spitfire_xcall_deliver(struct trap_per_cpu *tb, int cnt)
{
u64 *mondo, data0, data1, data2;
u16 *cpu_list;
u64 pstate; int i;
__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
cpu_list = __va(tb->cpu_list_pa);
mondo = __va(tb->cpu_mondo_block_pa);
data0 = mondo[0];
data1 = mondo[1];
data2 = mondo[2]; for (i = 0; i < cnt; i++)
spitfire_xcall_helper(data0, data1, data2, pstate, cpu_list[i]);
}
/* Cheetah now allows to send the whole 64-bytes of data in the interrupt * packet, but we have no use for that. However we do take advantage of * the new pipelining feature (ie. dispatch to multiple cpus simultaneously).
*/ staticvoid cheetah_xcall_deliver(struct trap_per_cpu *tb, int cnt)
{ int nack_busy_id, is_jbus, need_more;
u64 *mondo, pstate, ver, busy_mask;
u16 *cpu_list;
cpu_list = __va(tb->cpu_list_pa);
mondo = __va(tb->cpu_mondo_block_pa);
/* Unfortunately, someone at Sun had the brilliant idea to make the * busy/nack fields hard-coded by ITID number for this Ultra-III * derivative processor.
*/
__asm__ ("rdpr %%ver, %0" : "=r" (ver));
is_jbus = ((ver >> 32) == __JALAPENO_ID ||
(ver >> 32) == __SERRANO_ID);
if (dispatch_stat & busy_mask) { /* Busy bits will not clear, continue instead * of freezing up on this cpu.
*/
printk("CPU[%d]: mondo stuckage result[%016llx]\n",
smp_processor_id(), dispatch_stat);
} else { int i, this_busy_nack = 0;
/* Delay some random time with interrupts enabled * to prevent deadlock.
*/
udelay(2 * nack_busy_id);
/* Clear out the mask bits for cpus which did not * NACK us.
*/ for (i = 0; i < cnt; i++) {
u64 check_mask, nr;
/* Multi-cpu list version. * * Deliver xcalls to 'cnt' number of cpus in 'cpu_list'. * Sometimes not all cpus receive the mondo, requiring us to re-send * the mondo until all cpus have received, or cpus are truly stuck * unable to receive mondo, and we timeout. * Occasionally a target cpu strand is borrowed briefly by hypervisor to * perform guest service, such as PCIe error handling. Consider the * service time, 1 second overall wait is reasonable for 1 cpu. * Here two in-between mondo check wait time are defined: 2 usec for * single cpu quick turn around and up to 100usec for large cpu count. * Deliver mondo to large number of cpus could take longer, we adjusts * the retry count as long as target cpus are making forward progress.
*/ staticvoid hypervisor_xcall_deliver(struct trap_per_cpu *tb, int cnt)
{ int this_cpu, tot_cpus, prev_sent, i, rem; int usec_wait, retries, tot_retries;
u16 first_cpu = 0xffff; unsignedlong xc_rcvd = 0; unsignedlong status; int ecpuerror_id = 0; int enocpu_id = 0;
u16 *cpu_list;
u16 cpu;
do { int n_sent, mondo_delivered, target_cpu_busy;
status = sun4v_cpu_mondo_send(cnt,
tb->cpu_list_pa,
tb->cpu_mondo_block_pa);
/* HV_EOK means all cpus received the xcall, we're done. */ if (likely(status == HV_EOK)) goto xcall_done;
/* If not these non-fatal errors, panic */ if (unlikely((status != HV_EWOULDBLOCK) &&
(status != HV_ECPUERROR) &&
(status != HV_ENOCPU))) goto fatal_errors;
/* First, see if we made any forward progress. * * Go through the cpu_list, count the target cpus that have * received our mondo (n_sent), and those that did not (rem). * Re-pack cpu_list with the cpus remain to be retried in the * front - this simplifies tracking the truly stalled cpus. * * The hypervisor indicates successful sends by setting * cpu list entries to the value 0xffff. * * EWOULDBLOCK means some target cpus did not receive the * mondo and retry usually helps. * * ECPUERROR means at least one target cpu is in error state, * it's usually safe to skip the faulty cpu and retry. * * ENOCPU means one of the target cpu doesn't belong to the * domain, perhaps offlined which is unexpected, but not * fatal and it's okay to skip the offlined cpu.
*/
rem = 0;
n_sent = 0; for (i = 0; i < cnt; i++) {
cpu = cpu_list[i]; if (likely(cpu == 0xffff)) {
n_sent++;
} elseif ((status == HV_ECPUERROR) &&
(sun4v_cpu_state(cpu) == HV_CPU_STATE_ERROR)) {
ecpuerror_id = cpu + 1;
} elseif (status == HV_ENOCPU && !cpu_online(cpu)) {
enocpu_id = cpu + 1;
} else {
cpu_list[rem++] = cpu;
}
}
/* No cpu remained, we're done. */ if (rem == 0) break;
/* Otherwise, update the cpu count for retry. */
cnt = rem;
/* Record the overall number of mondos received by the * first of the remaining cpus.
*/ if (first_cpu != cpu_list[0]) {
first_cpu = cpu_list[0];
xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
}
/* Was any mondo delivered successfully? */
mondo_delivered = (n_sent > prev_sent);
prev_sent = n_sent;
/* or, was any target cpu busy processing other mondos? */
target_cpu_busy = (xc_rcvd < CPU_MONDO_COUNTER(first_cpu));
xc_rcvd = CPU_MONDO_COUNTER(first_cpu);
/* Retry count is for no progress. If we're making progress, * reset the retry count.
*/ if (likely(mondo_delivered || target_cpu_busy)) {
tot_retries += retries;
retries = 0;
} elseif (unlikely(retries > MONDO_RETRY_LIMIT)) { goto fatal_mondo_timeout;
}
/* Delay a little bit to let other cpus catch up on * their cpu mondo queue work.
*/ if (!mondo_delivered)
udelay(usec_wait);
retries++;
} while (1);
xcall_done: if (unlikely(ecpuerror_id > 0)) {
pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) was in error state\n",
this_cpu, ecpuerror_id - 1);
} elseif (unlikely(enocpu_id > 0)) {
pr_crit("CPU[%d]: SUN4V mondo cpu error, target cpu(%d) does not belong to the domain\n",
this_cpu, enocpu_id - 1);
} return;
fatal_errors: /* fatal errors include bad alignment, etc */
pr_crit("CPU[%d]: Args were cnt(%d) cpulist_pa(%lx) mondo_block_pa(%lx)\n",
this_cpu, tot_cpus, tb->cpu_list_pa, tb->cpu_mondo_block_pa);
panic("Unexpected SUN4V mondo error %lu\n", status);
fatal_mondo_timeout: /* some cpus being non-responsive to the cpu mondo */
pr_crit("CPU[%d]: SUN4V mondo timeout, cpu(%d) made no forward progress after %d retries. Total target cpus(%d).\n",
this_cpu, first_cpu, (tot_retries + retries), tot_cpus);
panic("SUN4V mondo timeout panic\n");
}
/* We have to do this whole thing with interrupts fully disabled. * Otherwise if we send an xcall from interrupt context it will * corrupt both our mondo block and cpu list state. * * One consequence of this is that we cannot use timeout mechanisms * that depend upon interrupts being delivered locally. So, for * example, we cannot sample jiffies and expect it to advance. * * Fortunately, udelay() uses %stick/%tick so we can use that.
*/
local_irq_save(flags);
/* It is not valid to test "current->active_mm == mm" here. * * The value of "current" is not changed atomically with * switch_mm(). But that's OK, we just need to check the * current cpu's trap block PGD physical address.
*/ if (tp->pgd_paddr == __pa(mm->pgd))
tsb_context_switch(mm);
}
void smp_tsb_sync(struct mm_struct *mm)
{
smp_call_function_many(mm_cpumask(mm), tsb_sync, mm, 1);
}
/* We know that the window frames of the user have been flushed * to the stack before we get here because all callers of us * are flush_tlb_*() routines, and these run after flush_cache_*() * which performs the flushw. * * mm->cpu_vm_mask is a bit mask of which cpus an address * space has (potentially) executed on, this is the heuristic * we use to limit cross calls.
*/
/* This currently is only used by the hugetlb arch pre-fault * hook on UltraSPARC-III+ and later when changing the pagesize * bits of the context register for an address space.
*/ void smp_flush_tlb_mm(struct mm_struct *mm)
{
u32 ctx = CTX_HWBITS(mm->context);
int __cpu_up(unsignedint cpu, struct task_struct *tidle)
{ int ret = smp_boot_one_cpu(cpu, tidle);
if (!ret) {
cpumask_set_cpu(cpu, &smp_commenced_mask); while (!cpu_online(cpu))
mb(); if (!cpu_online(cpu)) {
ret = -ENODEV;
} else { /* On SUN4V, writes to %tick and %stick are * not allowed.
*/ if (tlb_type != hypervisor)
smp_synchronize_one_tick(cpu);
}
} return ret;
}
#ifdef CONFIG_HOTPLUG_CPU void cpu_play_dead(void)
{ int cpu = smp_processor_id(); unsignedlong pstate;
idle_task_exit();
if (tlb_type == hypervisor) { struct trap_per_cpu *tb = &trap_block[cpu];
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.