staticinlinevoid shrinker_unit_free(struct shrinker_info *info, int start)
{ struct shrinker_info_unit **unit; int nr, i;
if (!info) return;
unit = info->unit;
nr = DIV_ROUND_UP(info->map_nr_max, SHRINKER_UNIT_BITS);
for (i = start; i < nr; i++) { if (!unit[i]) break;
kfree(unit[i]);
unit[i] = NULL;
}
}
staticinlineint shrinker_unit_alloc(struct shrinker_info *new, struct shrinker_info *old, int nid)
{ struct shrinker_info_unit *unit; int nr = DIV_ROUND_UP(new->map_nr_max, SHRINKER_UNIT_BITS); int start = old ? DIV_ROUND_UP(old->map_nr_max, SHRINKER_UNIT_BITS) : 0; int i;
for (i = start; i < nr; i++) {
unit = kzalloc_node(sizeof(*unit), GFP_KERNEL, nid); if (!unit) {
shrinker_unit_free(new, start); return -ENOMEM;
}
staticint expand_one_shrinker_info(struct mem_cgroup *memcg, int new_size, int old_size, int new_nr_max)
{ struct shrinker_info *new, *old; struct mem_cgroup_per_node *pn; int nid;
for_each_node(nid) {
pn = memcg->nodeinfo[nid];
old = shrinker_info_protected(memcg, nid); /* Not yet online memcg */ if (!old) return 0;
/* Already expanded this shrinker_info */ if (new_nr_max <= old->map_nr_max) continue;
new = kvzalloc_node(sizeof(*new) + new_size, GFP_KERNEL, nid); if (!new) return -ENOMEM;
/* * copy the current shrinker scan count into a local variable * and zero it so that other concurrent shrinker invocations * don't also do this scanning work.
*/
nr = xchg_nr_deferred(shrinker, shrinkctl);
if (shrinker->seeks) {
delta = freeable >> priority;
delta *= 4;
do_div(delta, shrinker->seeks);
} else { /* * These objects don't require any IO to create. Trim * them aggressively under memory pressure to keep * them from causing refetches in the IO caches.
*/
delta = freeable / 2;
}
/* * Normally, we should not scan less than batch_size objects in one * pass to avoid too frequent shrinker calls, but if the slab has less * than batch_size objects in total and we are really tight on memory, * we will try to reclaim all available objects, otherwise we can end * up failing allocations although there are plenty of reclaimable * objects spread over several slabs with usage less than the * batch_size. * * We detect the "tight on memory" situations by looking at the total * number of objects we want to scan (total_scan). If it is greater * than the total number of objects on slab (freeable), we must be * scanning at high prio and therefore should try to reclaim as much as * possible.
*/ while (total_scan >= batch_size ||
total_scan >= freeable) { unsignedlong ret; unsignedlong nr_to_scan = min(batch_size, total_scan);
shrinkctl->nr_to_scan = nr_to_scan;
shrinkctl->nr_scanned = nr_to_scan;
ret = shrinker->scan_objects(shrinker, shrinkctl); if (ret == SHRINK_STOP) break;
freed += ret;
/* * The deferred work is increased by any new work (delta) that wasn't * done, decreased by old deferred work that was done now. * * And it is capped to two times of the freeable items.
*/
next_deferred = max_t(long, (nr + delta - scanned), 0);
next_deferred = min(next_deferred, (2 * freeable));
/* * move the unused scan count back into the shrinker in a * manner that handles concurrent updates.
*/
new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl);
#ifdef CONFIG_MEMCG staticunsignedlong shrink_slab_memcg(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority)
{ struct shrinker_info *info; unsignedlong ret, freed = 0; int offset, index = 0;
if (!mem_cgroup_online(memcg)) return 0;
/* * lockless algorithm of memcg shrink. * * The shrinker_info may be freed asynchronously via RCU in the * expand_one_shrinker_info(), so the rcu_read_lock() needs to be used * to ensure the existence of the shrinker_info. * * The shrinker_info_unit is never freed unless its corresponding memcg * is destroyed. Here we already hold the refcount of memcg, so the * memcg will not be destroyed, and of course shrinker_info_unit will * not be freed. * * So in the memcg shrink: * step 1: use rcu_read_lock() to guarantee existence of the * shrinker_info. * step 2: after getting shrinker_info_unit we can safely release the * RCU lock. * step 3: traverse the bitmap and calculate shrinker_id * step 4: use rcu_read_lock() to guarantee existence of the shrinker. * step 5: use shrinker_id to find the shrinker, then use * shrinker_try_get() to guarantee existence of the shrinker, * then we can release the RCU lock to do do_shrink_slab() that * may sleep. * step 6: do shrinker_put() paired with step 5 to put the refcount, * if the refcount reaches 0, then wake up the waiter in * shrinker_free() by calling complete(). * Note: here is different from the global shrink, we don't * need to acquire the RCU lock to guarantee existence of * the shrinker, because we don't need to use this * shrinker to traverse the next shrinker in the bitmap. * step 7: we have already exited the read-side of rcu critical section * before calling do_shrink_slab(), the shrinker_info may be * released in expand_one_shrinker_info(), so go back to step 1 * to reacquire the shrinker_info.
*/
again:
rcu_read_lock();
info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info); if (unlikely(!info)) goto unlock;
if (index < shrinker_id_to_index(info->map_nr_max)) { struct shrinker_info_unit *unit;
/* Call non-slab shrinkers even though kmem is disabled */ if (!memcg_kmem_online() &&
!(shrinker->flags & SHRINKER_NONSLAB)) continue;
ret = do_shrink_slab(&sc, shrinker, priority); if (ret == SHRINK_EMPTY) {
clear_bit(offset, unit->map); /* * After the shrinker reported that it had no objects to * free, but before we cleared the corresponding bit in * the memcg shrinker map, a new object might have been * added. To make sure, we have the bit set in this * case, we invoke the shrinker one more time and reset * the bit if it reports that it is not empty anymore. * The memory barrier here pairs with the barrier in * set_shrinker_bit(): * * list_lru_add() shrink_slab_memcg() * list_add_tail() clear_bit() * <MB> <MB> * set_bit() do_shrink_slab()
*/
smp_mb__after_atomic();
ret = do_shrink_slab(&sc, shrinker, priority); if (ret == SHRINK_EMPTY)
ret = 0; else
set_shrinker_bit(memcg, nid, shrinker_id);
}
freed += ret;
shrinker_put(shrinker);
}
/** * shrink_slab - shrink slab caches * @gfp_mask: allocation context * @nid: node whose slab caches to target * @memcg: memory cgroup whose slab caches to target * @priority: the reclaim priority * * Call the shrink functions to age shrinkable caches. * * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, * unaware shrinkers will receive a node id of 0 instead. * * @memcg specifies the memory cgroup to target. Unaware shrinkers * are called only if it is the root cgroup. * * @priority is sc->priority, we take the number of objects and >> by priority * in order to get the scan target. * * Returns the number of reclaimed slab objects.
*/ unsignedlong shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority)
{ unsignedlong ret, freed = 0; struct shrinker *shrinker;
/* * The root memcg might be allocated even though memcg is disabled * via "cgroup_disable=memory" boot parameter. This could make * mem_cgroup_is_root() return false, then just run memcg slab * shrink, but skip global shrink. This may result in premature * oom.
*/ if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
/* * lockless algorithm of global shrink. * * In the unregistration setp, the shrinker will be freed asynchronously * via RCU after its refcount reaches 0. So both rcu_read_lock() and * shrinker_try_get() can be used to ensure the existence of the shrinker. * * So in the global shrink: * step 1: use rcu_read_lock() to guarantee existence of the shrinker * and the validity of the shrinker_list walk. * step 2: use shrinker_try_get() to try get the refcount, if successful, * then the existence of the shrinker can also be guaranteed, * so we can release the RCU lock to do do_shrink_slab() that * may sleep. * step 3: *MUST* to reacquire the RCU lock before calling shrinker_put(), * which ensures that neither this shrinker nor the next shrinker * will be freed in the next traversal operation. * step 4: do shrinker_put() paired with step 2 to put the refcount, * if the refcount reaches 0, then wake up the waiter in * shrinker_free() by calling complete().
*/
rcu_read_lock();
list_for_each_entry_rcu(shrinker, &shrinker_list, list) { struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
.memcg = memcg,
};
if (!shrinker_try_get(shrinker)) continue;
rcu_read_unlock();
ret = do_shrink_slab(&sc, shrinker, priority); if (ret == SHRINK_EMPTY)
ret = 0;
freed += ret;
if (flags & SHRINKER_MEMCG_AWARE) {
err = shrinker_memcg_alloc(shrinker); if (err == -ENOSYS) { /* Memcg is not supported, fallback to non-memcg-aware shrinker. */
shrinker->flags &= ~SHRINKER_MEMCG_AWARE; goto non_memcg;
}
if (err) goto err_flags;
return shrinker;
}
non_memcg: /* * The nr_deferred is available on per memcg level for memcg aware * shrinkers, so only allocate nr_deferred in the following cases: * - non-memcg-aware shrinkers * - !CONFIG_MEMCG * - memcg is disabled by kernel command line
*/
size = sizeof(*shrinker->nr_deferred); if (flags & SHRINKER_NUMA_AWARE)
size *= nr_node_ids;
shrinker->nr_deferred = kzalloc(size, GFP_KERNEL); if (!shrinker->nr_deferred) goto err_flags;
void shrinker_register(struct shrinker *shrinker)
{ if (unlikely(!(shrinker->flags & SHRINKER_ALLOCATED))) {
pr_warn("Must use shrinker_alloc() to dynamically allocate the shrinker"); return;
}
init_completion(&shrinker->done); /* * Now the shrinker is fully set up, take the first reference to it to * indicate that lookup operations are now allowed to use it via * shrinker_try_get().
*/
refcount_set(&shrinker->refcount, 1);
}
EXPORT_SYMBOL_GPL(shrinker_register);
if (shrinker->flags & SHRINKER_REGISTERED) { /* drop the initial refcount */
shrinker_put(shrinker); /* * Wait for all lookups of the shrinker to complete, after that, * no shrinker is running or will run again, then we can safely * free it asynchronously via RCU and safely free the structure * where the shrinker is located, such as super_block etc.
*/
wait_for_completion(&shrinker->done);
}
mutex_lock(&shrinker_mutex); if (shrinker->flags & SHRINKER_REGISTERED) { /* * Now we can safely remove it from the shrinker_list and then * free it.
*/
list_del_rcu(&shrinker->list);
debugfs_entry = shrinker_debugfs_detach(shrinker, &debugfs_id);
shrinker->flags &= ~SHRINKER_REGISTERED;
}
shrinker_debugfs_name_free(shrinker);
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
shrinker_memcg_remove(shrinker);
mutex_unlock(&shrinker_mutex);
if (debugfs_entry)
shrinker_debugfs_remove(debugfs_entry, debugfs_id);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.