/* * scale user-nice values [ -20 ... 0 ... 19 ] to time slice values: * [800ms ... 100ms ... 5ms] * * The higher a thread's priority, the bigger timeslices * it gets during one round of execution. But even the lowest * priority thread gets MIN_TIMESLICE worth of execution time.
*/ void spu_set_timeslice(struct spu_context *ctx)
{ if (ctx->prio < NORMAL_PRIO)
ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE * 4, ctx->prio); else
ctx->time_slice = SCALE_PRIO(DEF_SPU_TIMESLICE, ctx->prio);
}
/* * Update scheduling information from the owning thread.
*/ void __spu_update_sched_info(struct spu_context *ctx)
{ /* * assert that the context is not on the runqueue, so it is safe * to change its scheduling parameters.
*/
BUG_ON(!list_empty(&ctx->rq));
/* * 32-Bit assignments are atomic on powerpc, and we don't care about * memory ordering here because retrieving the controlling thread is * per definition racy.
*/
ctx->tid = current->pid;
/* * We do our own priority calculations, so we normally want * ->static_prio to start with. Unfortunately this field * contains junk for threads with a realtime scheduling * policy so we have to look at ->prio in this case.
*/ if (rt_prio(current->prio))
ctx->prio = current->prio; else
ctx->prio = current->static_prio;
ctx->policy = current->policy;
/* * TO DO: the context may be loaded, so we may need to activate * it again on a different node. But it shouldn't hurt anything * to update its parameters, because we know that the scheduler * is not actively looking at this field, since it is not on the * runqueue. The context will be rescheduled on the proper node * if it is timesliced or preempted.
*/
cpumask_copy(&ctx->cpus_allowed, current->cpus_ptr);
/* Save the current cpu id for spu interrupt routing. */
ctx->last_ran = raw_smp_processor_id();
}
void spu_update_sched_info(struct spu_context *ctx)
{ int node;
if (ctx->state == SPU_STATE_RUNNABLE) {
node = ctx->spu->node;
/* * Take list_mutex to sync with find_victim().
*/
mutex_lock(&cbe_spu_info[node].list_mutex);
__spu_update_sched_info(ctx);
mutex_unlock(&cbe_spu_info[node].list_mutex);
} else {
__spu_update_sched_info(ctx);
}
}
staticint __node_allowed(struct spu_context *ctx, int node)
{ if (nr_cpus_node(node)) { conststruct cpumask *mask = cpumask_of_node(node);
if (cpumask_intersects(mask, &ctx->cpus_allowed)) return 1;
}
return 0;
}
staticint node_allowed(struct spu_context *ctx, int node)
{ int rval;
/* * Must be used with the list_mutex held.
*/ staticinlineint sched_spu(struct spu *spu)
{
BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex));
staticstruct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, int group_size, int lowest_offset)
{ struct spu *spu; int node, n;
/* * TODO: A better algorithm could be used to find a good spu to be * used as reference location for the ctxs chain.
*/
node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { /* * "available_spus" counts how many spus are not potentially * going to be used by other affinity gangs whose reference * context is already in place. Although this code seeks to * avoid having affinity gangs with a summed amount of * contexts bigger than the amount of spus in the node, * this may happen sporadically. In this case, available_spus * becomes negative, which is harmless.
*/ int available_spus;
staticstruct spu *ctx_location(struct spu *ref, int offset, int node)
{ struct spu *spu;
spu = NULL; if (offset >= 0) {
list_for_each_entry(spu, ref->aff_list.prev, aff_list) {
BUG_ON(spu->node != node); if (offset == 0) break; if (sched_spu(spu))
offset--;
}
} else {
list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) {
BUG_ON(spu->node != node); if (offset == 0) break; if (sched_spu(spu))
offset++;
}
}
return spu;
}
/* * affinity_check is called each time a context is going to be scheduled. * It returns the spu ptr on which the context must run.
*/ staticint has_affinity(struct spu_context *ctx)
{ struct spu_gang *gang = ctx->gang;
if (list_empty(&ctx->aff_list)) return 0;
if (atomic_read(&ctx->gang->aff_sched_count) == 0)
ctx->gang->aff_ref_spu = NULL;
if (!gang->aff_ref_spu) { if (!(gang->aff_flags & AFF_MERGED))
aff_merge_remaining_ctxs(gang); if (!(gang->aff_flags & AFF_OFFSETS_SET))
aff_set_offsets(gang);
aff_set_ref_point_location(gang);
}
return gang->aff_ref_spu != NULL;
}
/** * spu_unbind_context - unbind spu context from physical spu * @spu: physical spu to unbind from * @ctx: context to unbind
*/ staticvoid spu_unbind_context(struct spu *spu, struct spu_context *ctx)
{
u32 status;
if (spu->ctx->flags & SPU_CREATE_NOSCHED)
atomic_dec(&cbe_spu_info[spu->node].reserved_spus);
if (ctx->gang) /* * If ctx->gang->aff_sched_count is positive, SPU affinity is * being considered in this gang. Using atomic_dec_if_positive * allow us to skip an explicit check for affinity in this gang
*/
atomic_dec_if_positive(&ctx->gang->aff_sched_count);
/* This maps the underlying spu state to idle */
spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
ctx->spu = NULL;
if (spu_stopped(ctx, &status))
wake_up_all(&ctx->stop_wq);
}
/** * spu_add_to_rq - add a context to the runqueue * @ctx: context to add
*/ staticvoid __spu_add_to_rq(struct spu_context *ctx)
{ /* * Unfortunately this code path can be called from multiple threads * on behalf of a single context due to the way the problem state * mmap support works. * * Fortunately we need to wake up all these threads at the same time * and can simply skip the runqueue addition for every but the first * thread getting into this codepath. * * It's still quite hacky, and long-term we should proxy all other * threads through the owner thread so that spu_run is in control * of all the scheduling activity for a given context.
*/ if (list_empty(&ctx->rq)) {
list_add_tail(&ctx->rq, &spu_prio->runq[ctx->prio]);
set_bit(ctx->prio, spu_prio->bitmap); if (!spu_prio->nr_waiting++)
mod_timer(&spusched_timer, jiffies + SPUSCHED_TICK);
}
}
/* * The caller must explicitly wait for a context to be loaded * if the nosched flag is set. If NOSCHED is not set, the caller * queues the context and waits for an spu event or error.
*/
BUG_ON(!(ctx->flags & SPU_CREATE_NOSCHED));
/** * find_victim - find a lower priority context to preempt * @ctx: candidate context for running * * Returns the freed physical spu to run the new context on.
*/ staticstruct spu *find_victim(struct spu_context *ctx)
{ struct spu_context *victim = NULL; struct spu *spu; int node, n;
/* * Look for a possible preemption candidate on the local node first. * If there is no candidate look at the other nodes. This isn't * exactly fair, but so far the whole spu scheduler tries to keep * a strong node affinity. We might want to fine-tune this in * the future.
*/
restart:
node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) {
node = (node < MAX_NUMNODES) ? node : 0; if (!node_allowed(ctx, node)) continue;
if (victim) { /* * This nests ctx->state_mutex, but we always lock * higher priority contexts before lower priority * ones, so this is safe until we introduce * priority inheritance schemes. * * XXX if the highest priority context is locked, * this can loop a long time. Might be better to * look at another context or give up after X retries.
*/ if (!mutex_trylock(&victim->state_mutex)) {
put_spu_context(victim);
victim = NULL; goto restart;
}
spu = victim->spu; if (!spu || victim->prio <= ctx->prio) { /* * This race can happen because we've dropped * the active list mutex. Not a problem, just * restart the search.
*/
mutex_unlock(&victim->state_mutex);
put_spu_context(victim);
victim = NULL; goto restart;
}
if (success)
wake_up_all(&ctx->run_wq); else
spu_add_to_rq(ctx);
}
staticvoid spu_schedule(struct spu *spu, struct spu_context *ctx)
{ /* not a candidate for interruptible because it's called either
from the scheduler thread or from spu_deactivate */
mutex_lock(&ctx->state_mutex); if (ctx->state == SPU_STATE_SAVED)
__spu_schedule(spu, ctx);
spu_release(ctx);
}
/** * spu_unschedule - remove a context from a spu, and possibly release it. * @spu: The SPU to unschedule from * @ctx: The context currently scheduled on the SPU * @free_spu Whether to free the SPU for other contexts * * Unbinds the context @ctx from the SPU @spu. If @free_spu is non-zero, the * SPU is made available for other contexts (ie, may be returned by * spu_get_idle). If this is zero, the caller is expected to schedule another * context to this spu. * * Should be called with ctx->state_mutex held.
*/ staticvoid spu_unschedule(struct spu *spu, struct spu_context *ctx, int free_spu)
{ int node = spu->node;
/** * spu_activate - find a free spu for a context and execute it * @ctx: spu context to schedule * @flags: flags (currently ignored) * * Tries to find a free spu to run @ctx. If no free spu is available * add the context to the runqueue so it gets woken up once an spu * is available.
*/ int spu_activate(struct spu_context *ctx, unsignedlong flags)
{ struct spu *spu;
/* * If there are multiple threads waiting for a single context * only one actually binds the context while the others will * only be able to acquire the state_mutex once the context * already is in runnable state.
*/ if (ctx->spu) return 0;
spu_activate_top: if (signal_pending(current)) return -ERESTARTSYS;
spu = spu_get_idle(ctx); /* * If this is a realtime thread we try to get it running by * preempting a lower priority thread.
*/ if (!spu && rt_prio(ctx->prio))
spu = find_victim(ctx); if (spu) { unsignedlong runcntl;
if (ctx->flags & SPU_CREATE_NOSCHED) {
spu_prio_wait(ctx); goto spu_activate_top;
}
spu_add_to_rq(ctx);
return 0;
}
/** * grab_runnable_context - try to find a runnable context * * Remove the highest priority context on the runqueue and return it * to the caller. Returns %NULL if no runnable context was found.
*/ staticstruct spu_context *grab_runnable_context(int prio, int node)
{ struct spu_context *ctx; int best;
spin_lock(&spu_prio->runq_lock);
best = find_first_bit(spu_prio->bitmap, prio); while (best < prio) { struct list_head *rq = &spu_prio->runq[best];
list_for_each_entry(ctx, rq, rq) { /* XXX(hch): check for affinity here as well */ if (__node_allowed(ctx, node)) {
__spu_del_from_rq(ctx); goto found;
}
}
best++;
}
ctx = NULL;
found:
spin_unlock(&spu_prio->runq_lock); return ctx;
}
staticint __spu_deactivate(struct spu_context *ctx, int force, int max_prio)
{ struct spu *spu = ctx->spu; struct spu_context *new = NULL;
if (spu) { new = grab_runnable_context(max_prio, spu->node); if (new || force) {
spu_unschedule(spu, ctx, new == NULL); if (new) { if (new->flags & SPU_CREATE_NOSCHED)
wake_up(&new->stop_wq); else {
spu_release(ctx);
spu_schedule(spu, new); /* this one can't easily be made
interruptible */
mutex_lock(&ctx->state_mutex);
}
}
}
}
returnnew != NULL;
}
/** * spu_deactivate - unbind a context from its physical spu * @ctx: spu context to unbind * * Unbind @ctx from the physical spu it is running on and schedule * the highest priority context to run on the freed physical spu.
*/ void spu_deactivate(struct spu_context *ctx)
{
spu_context_nospu_trace(spu_deactivate__enter, ctx);
__spu_deactivate(ctx, 1, MAX_PRIO);
}
/** * spu_yield - yield a physical spu if others are waiting * @ctx: spu context to yield * * Check if there is a higher priority context waiting and if yes * unbind @ctx from the physical spu and schedule the highest * priority context to run on the freed physical spu instead.
*/ void spu_yield(struct spu_context *ctx)
{
spu_context_nospu_trace(spu_yield__enter, ctx); if (!(ctx->flags & SPU_CREATE_NOSCHED)) {
mutex_lock(&ctx->state_mutex);
__spu_deactivate(ctx, 0, MAX_PRIO);
mutex_unlock(&ctx->state_mutex);
}
}
new = grab_runnable_context(ctx->prio + 1, spu->node); if (new) {
spu_unschedule(spu, ctx, 0); if (test_bit(SPU_SCHED_SPU_RUN, &ctx->sched_flags))
spu_add_to_rq(ctx);
} else {
spu_context_nospu_trace(spusched_tick__newslice, ctx); if (!ctx->time_slice)
ctx->time_slice++;
}
out:
spu_release(ctx);
if (new)
spu_schedule(spu, new);
}
/** * count_active_contexts - count nr of active tasks * * Return the number of tasks currently running or waiting to run. * * Note that we don't take runq_lock / list_mutex here. Reading * a single 32bit value is atomic on powerpc, and we don't care * about memory ordering issues here.
*/ staticunsignedlong count_active_contexts(void)
{ int nr_active = 0, node;
/** * spu_calc_load - update the avenrun load estimates. * * No locking against reading these values from userspace, as for * the CPU loadavg code.
*/ staticvoid spu_calc_load(void)
{ unsignedlong active_tasks; /* fixed-point */
/* * Update the physical SPU utilization statistics.
*/ if (spu) {
ctx->stats.times[old_state] += delta;
spu->stats.times[old_state] += delta;
spu->stats.util_state = new_state;
spu->stats.tstamp = curtime;
node = spu->node; if (old_state == SPU_UTIL_USER)
atomic_dec(&cbe_spu_info[node].busy_spus); if (new_state == SPU_UTIL_USER)
atomic_inc(&cbe_spu_info[node].busy_spus);
}
}
#ifdef CONFIG_PROC_FS staticint show_spu_loadavg(struct seq_file *s, void *private)
{ int a, b, c;
a = spu_avenrun[0] + (FIXED_1/200);
b = spu_avenrun[1] + (FIXED_1/200);
c = spu_avenrun[2] + (FIXED_1/200);
/* * Note that last_pid doesn't really make much sense for the * SPU loadavg (it even seems very odd on the CPU side...), * but we include it here to have a 100% compatible interface.
*/
seq_printf(s, "%d.%02d %d.%02d %d.%02d %ld/%d %d\n",
LOAD_INT(a), LOAD_FRAC(a),
LOAD_INT(b), LOAD_FRAC(b),
LOAD_INT(c), LOAD_FRAC(c),
count_active_contexts(),
atomic_read(&nr_spu_contexts),
idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0;
} #endif
int __init spu_sched_init(void)
{ struct proc_dir_entry *entry; int err = -ENOMEM, i;
spu_prio = kzalloc(sizeof(struct spu_prio_array), GFP_KERNEL); if (!spu_prio) goto out;
for (i = 0; i < MAX_PRIO; i++) {
INIT_LIST_HEAD(&spu_prio->runq[i]);
__clear_bit(i, spu_prio->bitmap);
}
spin_lock_init(&spu_prio->runq_lock);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.