/* * Frequency meter - How fast is some event occurring? * * These routines manage a digitally filtered, constant time based, * event frequency meter. There are four routines: * fmeter_init() - initialize a frequency meter. * fmeter_markevent() - called each time the event happens. * fmeter_getrate() - returns the recent rate of such events. * fmeter_update() - internal routine used to update fmeter. * * A common data structure is passed to each of these routines, * which is used to keep track of the state required to manage the * frequency meter and its digital filter. * * The filter works on the number of events marked per unit time. * The filter is single-pole low-pass recursive (IIR). The time unit * is 1 second. Arithmetic is done using 32-bit integers scaled to * simulate 3 decimal digits of precision (multiplied by 1000). * * With an FM_COEF of 933, and a time base of 1 second, the filter * has a half-life of 10 seconds, meaning that if the events quit * happening, then the rate returned from the fmeter_getrate() * will be cut in half each 10 seconds, until it converges to zero. * * It is not worth doing a real infinitely recursive filter. If more * than FM_MAXTICKS ticks have elapsed since the last filter event, * just compute FM_MAXTICKS ticks worth, by which point the level * will be stable. * * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid * arithmetic overflow in the fmeter_update() routine. * * Given the simple 32 bit integer arithmetic used, this meter works * best for reporting rates between one per millisecond (msec) and * one per 32 (approx) seconds. At constant rates faster than one * per msec it maxes out at values just under 1,000,000. At constant * rates between one per msec, and one per second it will stabilize * to a value N*1000, where N is the rate of events per second. * At constant rates between one per second and one per 32 seconds, * it will be choppy, moving up on the seconds that have an event, * and then decaying until the next event. At rates slower than * about one in 32 seconds, it decays all the way back to zero between * each event.
*/
#define FM_COEF 933 /* coefficient for half-life of 10 secs */ #define FM_MAXTICKS ((u32)99) /* useless computing more ticks than this */ #define FM_MAXCNT 1000000 /* limit cnt to avoid overflow */ #define FM_SCALE 1000 /* faux fixed point scale */
/* Initialize a frequency meter */ void fmeter_init(struct fmeter *fmp)
{
fmp->cnt = 0;
fmp->val = 0;
fmp->time = 0;
spin_lock_init(&fmp->lock);
}
/* Internal meter update - process cnt events and update value */ staticvoid fmeter_update(struct fmeter *fmp)
{
time64_t now;
u32 ticks;
now = ktime_get_seconds();
ticks = now - fmp->time;
/* Process any previous ticks, then bump cnt by one (times scale). */ staticvoid fmeter_markevent(struct fmeter *fmp)
{
spin_lock(&fmp->lock);
fmeter_update(fmp);
fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
spin_unlock(&fmp->lock);
}
/* Process any previous ticks, then return current value. */ staticint fmeter_getrate(struct fmeter *fmp)
{ int val;
spin_lock(&fmp->lock);
fmeter_update(fmp);
val = fmp->val;
spin_unlock(&fmp->lock); return val;
}
/* * Collection of memory_pressure is suppressed unless * this flag is enabled by writing "1" to the special * cpuset file 'memory_pressure_enabled' in the root cpuset.
*/
int cpuset_memory_pressure_enabled __read_mostly;
/* * __cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims. * * Keep a running average of the rate of synchronous (direct) * page reclaim efforts initiated by tasks in each cpuset. * * This represents the rate at which some task in the cpuset * ran low on memory on all nodes it was allowed to use, and * had to enter the kernels page reclaim code in an effort to * create more free memory by tossing clean pages or swapping * or writing dirty pages. * * Display to user space in the per-cpuset read-only file * "memory_pressure". Value displayed is an integer * representing the recent rate of entry into the synchronous * (direct) page reclaim by any task attached to the cpuset.
*/
switch (type) { case FILE_SCHED_RELAX_DOMAIN_LEVEL: return cs->relax_domain_level; default:
BUG();
}
/* Unreachable but makes gcc happy */ return 0;
}
/* * update task's spread flag if cpuset's page/slab spread flag is set * * Call with callback_lock or cpuset_mutex held. The check can be skipped * if on default hierarchy.
*/ void cpuset1_update_task_spread_flags(struct cpuset *cs, struct task_struct *tsk)
{ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) return;
if (is_spread_page(cs))
task_set_spread_page(tsk); else
task_clear_spread_page(tsk);
if (is_spread_slab(cs))
task_set_spread_slab(tsk); else
task_clear_spread_slab(tsk);
}
/** * cpuset1_update_tasks_flags - update the spread flags of tasks in the cpuset. * @cs: the cpuset in which each task's spread flags needs to be changed * * Iterate through each task of @cs updating its spread flags. As this * function is called with cpuset_mutex held, cpuset membership stays * stable.
*/ void cpuset1_update_tasks_flags(struct cpuset *cs)
{ struct css_task_iter it; struct task_struct *task;
/* * If CPU and/or memory hotplug handlers, below, unplug any CPUs * or memory nodes, we need to walk over the cpuset hierarchy, * removing that CPU or node from all cpusets. If this removes the * last CPU or node from a cpuset, then move the tasks in the empty * cpuset to its next-highest non-empty parent.
*/ staticvoid remove_tasks_in_empty_cpuset(struct cpuset *cs)
{ struct cpuset *parent;
/* * Find its next-highest non-empty parent, (top cpuset * has online cpus, so can't be empty).
*/
parent = parent_cs(cs); while (cpumask_empty(parent->cpus_allowed) ||
nodes_empty(parent->mems_allowed))
parent = parent_cs(parent);
if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
pr_err("cpuset: failed to transfer tasks out of empty cpuset ");
pr_cont_cgroup_name(cs->css.cgroup);
pr_cont("\n");
}
}
/* * Don't call cpuset_update_tasks_cpumask() if the cpuset becomes empty, * as the tasks will be migrated to an ancestor.
*/ if (cpus_updated && !cpumask_empty(cs->cpus_allowed))
cpuset_update_tasks_cpumask(cs, new_cpus); if (mems_updated && !nodes_empty(cs->mems_allowed))
cpuset_update_tasks_nodemask(cs);
/* * Move tasks to the nearest ancestor with execution resources, * This is full cgroup operation which will also call back into * cpuset. Execute it asynchronously using workqueue.
*/ if (is_empty && cs->css.cgroup->nr_populated_csets &&
css_tryget_online(&cs->css)) { struct cpuset_remove_tasks_struct *s;
s = kzalloc(sizeof(*s), GFP_KERNEL); if (WARN_ON_ONCE(!s)) {
css_put(&cs->css); return;
}
/* * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q? * * One cpuset is a subset of another if all its allowed CPUs and * Memory Nodes are a subset of the other, and its exclusive flags * are only set if the other's are set. Call holding cpuset_mutex.
*/
/* * cpuset1_validate_change() - Validate conditions specific to legacy (v1) * behavior.
*/ int cpuset1_validate_change(struct cpuset *cur, struct cpuset *trial)
{ struct cgroup_subsys_state *css; struct cpuset *c, *par; int ret;
WARN_ON_ONCE(!rcu_read_lock_held());
/* Each of our child cpusets must be a subset of us */
ret = -EBUSY;
cpuset_for_each_child(c, css, cur) if (!is_cpuset_subset(c, trial)) goto out;
/* On legacy hierarchy, we must be a subset of our parent cpuset. */
ret = -EACCES;
par = parent_cs(cur); if (par && !is_cpuset_subset(trial, par)) goto out;
ret = 0;
out: return ret;
}
#ifdef CONFIG_PROC_PID_CPUSET /* * proc_cpuset_show() * - Print tasks cpuset path into seq_file. * - Used for /proc/<pid>/cpuset.
*/ int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *tsk)
{ char *buf; struct cgroup_subsys_state *css; int retval;
switch (type) { case FILE_CPU_EXCLUSIVE: return is_cpu_exclusive(cs); case FILE_MEM_EXCLUSIVE: return is_mem_exclusive(cs); case FILE_MEM_HARDWALL: return is_mem_hardwall(cs); case FILE_SCHED_LOAD_BALANCE: return is_sched_load_balance(cs); case FILE_MEMORY_MIGRATE: return is_memory_migrate(cs); case FILE_MEMORY_PRESSURE_ENABLED: return cpuset_memory_pressure_enabled; case FILE_MEMORY_PRESSURE: return fmeter_getrate(&cs->fmeter); case FILE_SPREAD_PAGE: return is_spread_page(cs); case FILE_SPREAD_SLAB: return is_spread_slab(cs); default:
BUG();
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.