struct qnode { struct qnode *next; struct qspinlock *lock; int cpu;
u8 sleepy; /* 1 if the previous vCPU was preempted or
* if the previous node was sleepy */
u8 locked; /* 1 if lock acquired */
};
struct qnodes { int count; struct qnode nodes[MAX_NODES];
};
#if _Q_SPIN_SPEC_BARRIER == 1 #define spec_barrier() do { asmvolatile("ori 31,31,0" ::: "memory"); } while (0) #else #define spec_barrier() do { } while (0) #endif
static __always_inline bool recently_sleepy(void)
{ /* pv_sleepy_lock is true when this is called */ if (pv_sleepy_lock_interval_ns) {
u64 seen = this_cpu_read(sleepy_lock_seen_clock);
if (seen) {
u64 delta = sched_clock() - seen; if (delta < pv_sleepy_lock_interval_ns) returntrue;
this_cpu_write(sleepy_lock_seen_clock, 0);
}
}
returnfalse;
}
static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
{ if (paravirt && sleepy) return steal_spins * pv_sleepy_lock_factor; else return steal_spins;
}
static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
{ if (paravirt && sleepy) return remote_steal_spins * pv_sleepy_lock_factor; else return remote_steal_spins;
}
static __always_inline int get_head_spins(bool paravirt, bool sleepy)
{ if (paravirt && sleepy) return head_spins * pv_sleepy_lock_factor; else return head_spins;
}
/* * Try to acquire the lock if it was not already locked. If the tail matches * mytail then clear it, otherwise leave it unchnaged. Return previous value. * * This is used by the head of the queue to acquire the lock and clean up * its tail if it was the last one queued.
*/ static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
{
u32 newval = queued_spin_encode_locked_val();
u32 prev, tmp;
asmvolatile( "1: lwarx %0,0,%2,%7 # trylock_clean_tail \n" /* This test is necessary if there could be stealers */ " andi. %1,%0,%5 \n" " bne 3f \n" /* Test whether the lock tail == mytail */ " and %1,%0,%6 \n" " cmpw 0,%1,%3 \n" /* Merge the new locked value */ " or %1,%1,%4 \n" " bne 2f \n" /* If the lock tail matched, then clear it, otherwise leave it. */ " andc %1,%1,%6 \n" "2: stwcx. %1,0,%2 \n" " bne- 1b \n" "\t" PPC_ACQUIRE_BARRIER " \n" "3: \n"
: "=&r" (prev), "=&r" (tmp)
: "r" (&lock->val), "r"(tail), "r" (newval), "i" (_Q_LOCKED_VAL), "r" (_Q_TAIL_CPU_MASK), "i" (_Q_SPIN_EH_HINT)
: "cr0", "memory");
return prev;
}
/* * Publish our tail, replacing previous tail. Return previous value. * * This provides a release barrier for publishing node, this pairs with the * acquire barrier in get_tail_qnode() when the next CPU finds this tail * value.
*/ static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
{
u32 prev, tmp;
static __always_inline void seen_sleepy_node(void)
{ if (pv_sleepy_lock) { if (pv_sleepy_lock_interval_ns)
this_cpu_write(sleepy_lock_seen_clock, sched_clock()); /* Don't set sleepy because we likely have a stale val */
}
}
staticstruct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu)
{ struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu); int idx;
/* * After publishing the new tail and finding a previous tail in the * previous val (which is the control dependency), this barrier * orders the release barrier in publish_tail_cpu performed by the * last CPU, with subsequently looking at its qnode structures * after the barrier.
*/
smp_acquire__after_ctrl_dep();
if ((yield_count & 1) == 0) goto relax; /* owner vcpu is running */
spin_end();
seen_sleepy_owner(lock, val);
preempted = true;
/* * Read the lock word after sampling the yield count. On the other side * there may a wmb because the yield count update is done by the * hypervisor preemption and the value update by the OS, however this * ordering might reduce the chance of out of order accesses and * improve the heuristic.
*/
smp_rmb();
if (READ_ONCE(lock->val) == val) { if (mustq)
clear_mustq(lock);
yield_to_preempted(owner, yield_count); if (mustq)
set_mustq(lock);
spin_begin();
/* Don't relax if we yielded. Maybe we should? */ return preempted;
}
spin_begin();
relax:
spin_cpu_relax();
return preempted;
}
/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
{ return __yield_to_locked_owner(lock, val, paravirt, false);
}
/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */ static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
{ bool mustq = false;
if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
mustq = true;
/* * If the previous waiter was preempted it might not be able to * propagate sleepy to us, so check the lock in that case too.
*/ if (node->sleepy || vcpu_is_preempted(prev_cpu)) {
u32 val = READ_ONCE(lock->val);
if (val & _Q_LOCKED_VAL) { if (node->next && !node->next->sleepy) { /* * Propagate sleepy to next waiter. Only if * owner is preempted, which allows the queue * to become "non-sleepy" if vCPU preemption * ceases to occur, even if the lock remains * highly contended.
*/ if (vcpu_is_preempted(get_owner_cpu(val)))
node->next->sleepy = 1;
}
yield_count = yield_count_of(prev_cpu); if ((yield_count & 1) == 0) goto relax; /* owner vcpu is running */
spin_end();
preempted = true;
seen_sleepy_node();
smp_rmb(); /* See __yield_to_locked_owner comment */
if (!READ_ONCE(node->locked)) {
yield_to_preempted(prev_cpu, yield_count);
spin_begin(); return preempted;
}
spin_begin();
relax:
spin_cpu_relax();
return preempted;
}
static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
{ if (iters >= get_steal_spins(paravirt, sleepy)) returntrue;
if (IS_ENABLED(CONFIG_NUMA) &&
(iters >= get_remote_steal_spins(paravirt, sleepy))) { int cpu = get_owner_cpu(val); if (numa_node_id() != cpu_to_node(cpu)) returntrue;
} returnfalse;
}
if (!steal_spins) { /* XXX: should spin_on_preempted_owner do anything here? */ returnfalse;
}
/* Attempt to steal the lock */
spin_begin(); do { bool preempted = false;
val = READ_ONCE(lock->val); if (val & _Q_MUST_Q_VAL) break;
spec_barrier();
if (unlikely(!(val & _Q_LOCKED_VAL))) {
spin_end(); if (__queued_spin_trylock_steal(lock)) returntrue;
spin_begin();
} else {
preempted = yield_to_locked_owner(lock, val, paravirt);
}
if (paravirt && pv_sleepy_lock) { if (!sleepy) { if (val & _Q_SLEEPY_VAL) {
seen_sleepy_lock();
sleepy = true;
} elseif (recently_sleepy()) {
sleepy = true;
}
} if (pv_sleepy_lock_sticky && seen_preempted &&
!(val & _Q_SLEEPY_VAL)) { if (try_set_sleepy(lock, val))
val |= _Q_SLEEPY_VAL;
}
}
if (preempted) {
seen_preempted = true;
sleepy = true; if (!pv_spin_on_preempted_owner)
iters++; /* * pv_spin_on_preempted_owner don't increase iters * while the owner is preempted -- we won't interfere * with it by definition. This could introduce some * latency issue if we continually observe preempted * owners, but hopefully that's a rare corner case of * a badly oversubscribed system.
*/
} else {
iters++;
}
} while (!steal_break(val, iters, paravirt, sleepy));
qnodesp = this_cpu_ptr(&qnodes); if (unlikely(qnodesp->count >= MAX_NODES)) {
spec_barrier(); while (!queued_spin_trylock(lock))
cpu_relax(); return;
}
idx = qnodesp->count++; /* * Ensure that we increment the head node->count before initialising * the actual node. If the compiler is kind enough to reorder these * stores, then an IRQ could overwrite our assignments.
*/
barrier();
node = &qnodesp->nodes[idx];
node->next = NULL;
node->lock = lock;
node->cpu = smp_processor_id();
node->sleepy = 0;
node->locked = 0;
tail = encode_tail_cpu(node->cpu);
/* * Assign all attributes of a node before it can be published. * Issues an lwsync, serving as a release barrier, as well as a * compiler barrier.
*/
old = publish_tail_cpu(lock, tail);
/* * If there was a previous node; link it and wait until reaching the * head of the waitqueue.
*/ if (old & _Q_TAIL_CPU_MASK) { int prev_cpu = decode_tail_cpu(old); struct qnode *prev = get_tail_qnode(lock, prev_cpu);
/* Link @node into the waitqueue. */
WRITE_ONCE(prev->next, node);
/* Wait for mcs node lock to be released */
spin_begin(); while (!READ_ONCE(node->locked)) {
spec_barrier();
/* * Generic qspinlocks have this prefetch here, but it seems * like it could cause additional line transitions because * the waiter will keep loading from it.
*/ if (_Q_SPIN_PREFETCH_NEXT) {
next = READ_ONCE(node->next); if (next)
prefetchw(next);
}
}
/* We're at the head of the waitqueue, wait for the lock. */
again:
spin_begin(); for (;;) { bool preempted;
val = READ_ONCE(lock->val); if (!(val & _Q_LOCKED_VAL)) break;
spec_barrier();
if (paravirt && pv_sleepy_lock && maybe_stealers) { if (!sleepy) { if (val & _Q_SLEEPY_VAL) {
seen_sleepy_lock();
sleepy = true;
} elseif (recently_sleepy()) {
sleepy = true;
}
} if (pv_sleepy_lock_sticky && seen_preempted &&
!(val & _Q_SLEEPY_VAL)) { if (try_set_sleepy(lock, val))
val |= _Q_SLEEPY_VAL;
}
}
if (!pv_spin_on_preempted_owner)
iters++;
} else {
iters++;
}
if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
mustq = true;
set_mustq(lock);
val |= _Q_MUST_Q_VAL;
}
}
spec_barrier();
spin_end();
/* If we're the last queued, must clean up the tail. */
old = trylock_clean_tail(lock, tail); if (unlikely(old & _Q_LOCKED_VAL)) {
BUG_ON(!maybe_stealers); goto again; /* Can only be true if maybe_stealers. */
}
if ((old & _Q_TAIL_CPU_MASK) == tail) goto release; /* We were the tail, no next. */
/* There is a next, must wait for node->next != NULL (MCS protocol) */
next = READ_ONCE(node->next); if (!next) {
spin_begin(); while (!(next = READ_ONCE(node->next)))
cpu_relax();
spin_end();
}
spec_barrier();
/* * Unlock the next mcs waiter node. Release barrier is not required * here because the acquirer is only accessing the lock word, and * the acquire barrier we took the lock with orders that update vs * this store to locked. The corresponding barrier is the smp_rmb() * acquire barrier for mcs lock, above.
*/ if (paravirt && pv_prod_head) { int next_cpu = next->cpu;
WRITE_ONCE(next->locked, 1); if (_Q_SPIN_MISO) asmvolatile("miso" ::: "memory"); if (vcpu_is_preempted(next_cpu))
prod_cpu(next_cpu);
} else {
WRITE_ONCE(next->locked, 1); if (_Q_SPIN_MISO) asmvolatile("miso" ::: "memory");
}
release: /* * Clear the lock before releasing the node, as another CPU might see stale * values if an interrupt occurs after we increment qnodesp->count * but before node->lock is initialized. The barrier ensures that * there are no further stores to the node after it has been released.
*/
node->lock = NULL;
barrier();
qnodesp->count--;
}
void queued_spin_lock_slowpath(struct qspinlock *lock)
{ /* * This looks funny, but it induces the compiler to inline both * sides of the branch rather than share code as when the condition * is passed as the paravirt argument to the functions.
*/ if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) { if (try_to_steal_lock(lock, true)) {
spec_barrier(); return;
}
queued_spin_lock_mcs_queue(lock, true);
} else { if (try_to_steal_lock(lock, false)) {
spec_barrier(); return;
}
queued_spin_lock_mcs_queue(lock, false);
}
}
EXPORT_SYMBOL(queued_spin_lock_slowpath);
/* * The lock slow path has a !maybe_stealers case that can assume * the head of queue will not see concurrent waiters. That waiter * is unsafe in the presence of stealers, so must keep them away * from one another.
*/
mutex_lock(&lock); if (val && !steal_spins) {
maybe_stealers = true; /* wait for queue head waiter to go away */
synchronize_rcu();
steal_spins = val;
} elseif (!val && steal_spins) {
steal_spins = val; /* wait for all possible stealers to go away */
synchronize_rcu();
maybe_stealers = false;
} else {
steal_spins = val;
}
mutex_unlock(&lock); #endif
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.