// SPDX-License-Identifier: GPL-2.0-only /* * Linux VM pressure * * Copyright 2012 Linaro Ltd. * Anton Vorontsov <anton.vorontsov@linaro.org> * * Based on ideas from Andrew Morton, David Rientjes, KOSAKI Motohiro, * Leonid Moiseichuk, Mel Gorman, Minchan Kim and Pekka Enberg.
*/
/* * The window size (vmpressure_win) is the number of scanned pages before * we try to analyze scanned/reclaimed ratio. So the window is used as a * rate-limit tunable for the "low" level notification, and also for * averaging the ratio for medium/critical levels. Using small window * sizes can cause lot of false positives, but too big window size will * delay the notifications. * * As the vmscan reclaimer logic works with chunks which are multiple of * SWAP_CLUSTER_MAX, it makes sense to use it for the window size as well. * * TODO: Make the window size depend on machine size, as we do for vmstat * thresholds. Currently we set it to 512 pages (2MB for 4KB pages).
*/ staticconstunsignedlong vmpressure_win = SWAP_CLUSTER_MAX * 16;
/* * These thresholds are used when we account memory pressure through * scanned/reclaimed ratio. The current values were chosen empirically. In * essence, they are percents: the higher the value, the more number * unsuccessful reclaims there were.
*/ staticconstunsignedint vmpressure_level_med = 60; staticconstunsignedint vmpressure_level_critical = 95;
/* * When there are too little pages left to scan, vmpressure() may miss the * critical pressure as number of pages will be less than "window size". * However, in that case the vmscan priority will raise fast as the * reclaimer will try to scan LRUs more deeply. * * The vmscan logic considers these special priorities: * * prio == DEF_PRIORITY (12): reclaimer starts with that value * prio <= DEF_PRIORITY - 2 : kswapd becomes somewhat overwhelmed * prio == 0 : close to OOM, kernel scans every page in an lru * * Any value in this range is acceptable for this tunable (i.e. from 12 to * 0). Current value for the vmpressure_level_critical_prio is chosen * empirically, but the number, in essence, means that we consider * critical level when scanning depth is ~10% of the lru size (vmscan * scans 'lru_size >> prio' pages, so it is actually 12.5%, or one * eights).
*/ staticconstunsignedint vmpressure_level_critical_prio = ilog2(100 / 10);
/* * reclaimed can be greater than scanned for things such as reclaimed * slab pages. shrink_node() just adds reclaimed pages without a * related increment to scanned pages.
*/ if (reclaimed >= scanned) goto out; /* * We calculate the ratio (in percents) of how many pages were * scanned vs. reclaimed in a given time frame (window). Note that * time is in VM reclaimer's "ticks", i.e. number of pages * scanned. This makes it possible to set desired reaction time * and serves as a ratelimit.
*/
pressure = scale - (reclaimed * scale / scanned);
pressure = pressure * 100 / scale;
spin_lock(&vmpr->sr_lock); /* * Several contexts might be calling vmpressure(), so it is * possible that the work was rescheduled again before the old * work context cleared the counters. In that case we will run * just after the old work returns, but then scanned might be zero * here. No need for any locks here since we don't care if * vmpr->reclaimed is in sync.
*/
scanned = vmpr->tree_scanned; if (!scanned) {
spin_unlock(&vmpr->sr_lock); return;
}
do { if (vmpressure_event(vmpr, level, ancestor, signalled))
signalled = true;
ancestor = true;
} while ((vmpr = vmpressure_parent(vmpr)));
}
/** * vmpressure() - Account memory pressure through scanned/reclaimed ratio * @gfp: reclaimer's gfp mask * @memcg: cgroup memory controller handle * @tree: legacy subtree mode * @scanned: number of pages scanned * @reclaimed: number of pages reclaimed * * This function should be called from the vmscan reclaim path to account * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw * pressure index is then further refined and averaged over time. * * If @tree is set, vmpressure is in traditional userspace reporting * mode: @memcg is considered the pressure root and userspace is * notified of the entire subtree's reclaim efficiency. * * If @tree is not set, reclaim efficiency is recorded for @memcg, and * only in-kernel users are notified. * * This function does not return any value.
*/ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, bool tree, unsignedlong scanned, unsignedlong reclaimed)
{ struct vmpressure *vmpr;
if (mem_cgroup_disabled()) return;
/* * The in-kernel users only care about the reclaim efficiency * for this @memcg rather than the whole subtree, and there * isn't and won't be any in-kernel user in a legacy cgroup.
*/ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !tree) return;
vmpr = memcg_to_vmpressure(memcg);
/* * Here we only want to account pressure that userland is able to * help us with. For example, suppose that DMA zone is under * pressure; if we notify userland about that kind of pressure, * then it will be mostly a waste as it will trigger unnecessary * freeing of memory by userland (since userland is more likely to * have HIGHMEM/MOVABLE pages instead of the DMA fallback). That * is why we include only movable, highmem and FS/IO pages. * Indirect reclaim (kswapd) sets sc->gfp_mask to GFP_KERNEL, so * we account it too.
*/ if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS))) return;
/* * If we got here with no pages scanned, then that is an indicator * that reclaimer was unable to find any shrinkable LRUs at the * current scanning depth. But it does not mean that we should * report the critical pressure, yet. If the scanning priority * (scanning depth) goes too high (deep), we will be notified * through vmpressure_prio(). But so far, keep calm.
*/ if (!scanned) return;
if (level > VMPRESSURE_LOW) { /* * Let the socket buffer allocator know that * we are having trouble reclaiming LRU pages. * * For hysteresis keep the pressure state * asserted for a second in which subsequent * pressure events can occur.
*/
mem_cgroup_set_socket_pressure(memcg);
}
}
}
/** * vmpressure_prio() - Account memory pressure through reclaimer priority level * @gfp: reclaimer's gfp mask * @memcg: cgroup memory controller handle * @prio: reclaimer's priority * * This function should be called from the reclaim path every time when * the vmscan's reclaiming priority (scanning depth) changes. * * This function does not return any value.
*/ void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio)
{ /* * We only use prio for accounting critical level. For more info * see comment for vmpressure_level_critical_prio variable above.
*/ if (prio > vmpressure_level_critical_prio) return;
/* * OK, the prio is below the threshold, updating vmpressure * information before shrinker dives into long shrinking of long * range vmscan. Passing scanned = vmpressure_win, reclaimed = 0 * to the vmpressure() basically means that we signal 'critical' * level.
*/
vmpressure(gfp, memcg, true, vmpressure_win, 0);
}
/** * vmpressure_register_event() - Bind vmpressure notifications to an eventfd * @memcg: memcg that is interested in vmpressure notifications * @eventfd: eventfd context to link notifications with * @args: event arguments (pressure level threshold, optional mode) * * This function associates eventfd context with the vmpressure * infrastructure, so that the notifications will be delivered to the * @eventfd. The @args parameter is a comma-delimited string that denotes a * pressure level threshold (one of vmpressure_str_levels, i.e. "low", "medium", * or "critical") and an optional mode (one of vmpressure_str_modes, i.e. * "hierarchy" or "local"). * * To be used as memcg event method. * * Return: 0 on success, -ENOMEM on memory failure or -EINVAL if @args could * not be parsed.
*/ int vmpressure_register_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd, constchar *args)
{ struct vmpressure *vmpr = memcg_to_vmpressure(memcg); struct vmpressure_event *ev; enum vmpressure_modes mode = VMPRESSURE_NO_PASSTHROUGH; enum vmpressure_levels level; char *spec, *spec_orig; char *token; int ret = 0;
/** * vmpressure_unregister_event() - Unbind eventfd from vmpressure * @memcg: memcg handle * @eventfd: eventfd context that was used to link vmpressure with the @cg * * This function does internal manipulations to detach the @eventfd from * the vmpressure notifications, and then frees internal resources * associated with the @eventfd (but the @eventfd itself is not freed). * * To be used as memcg event method.
*/ void vmpressure_unregister_event(struct mem_cgroup *memcg, struct eventfd_ctx *eventfd)
{ struct vmpressure *vmpr = memcg_to_vmpressure(memcg); struct vmpressure_event *ev;
/** * vmpressure_init() - Initialize vmpressure control structure * @vmpr: Structure to be initialized * * This function should be called on every allocated vmpressure structure * before any usage.
*/ void vmpressure_init(struct vmpressure *vmpr)
{
spin_lock_init(&vmpr->sr_lock);
mutex_init(&vmpr->events_lock);
INIT_LIST_HEAD(&vmpr->events);
INIT_WORK(&vmpr->work, vmpressure_work_fn);
}
/** * vmpressure_cleanup() - shuts down vmpressure control structure * @vmpr: Structure to be cleaned up * * This function should be called before the structure in which it is * embedded is cleaned up.
*/ void vmpressure_cleanup(struct vmpressure *vmpr)
{ /* * Make sure there is no pending work before eventfd infrastructure * goes away.
*/
flush_work(&vmpr->work);
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.1 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.