// SPDX-License-Identifier: GPL-2.0 /* * Copyright 2023-2024 Intel Corporation (Maarten Lankhorst <dev@lankhorst.se>) * Copyright 2024 Red Hat (Maxime Ripard <mripard@kernel.org>) * Partially based on the rdma and misc controllers, which bear the following copyrights: * * Copyright 2020 Google LLC * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
*/
struct dmem_cgroup_region { /** * @ref: References keeping the region alive. * Keeps the region reference alive after a succesful RCU lookup.
*/ struct kref ref;
/** @rcu: RCU head for freeing */ struct rcu_head rcu;
/** * @region_node: Linked into &dmem_cgroup_regions list. * Protected by RCU and global spinlock.
*/ struct list_head region_node;
/** * @pools: List of pools linked to this region. * Protected by global spinlock only
*/ struct list_head pools;
/** @size: Size of region, in bytes */
u64 size;
/** @name: Name describing the node, set by dmem_cgroup_register_region */ char *name;
/** * @unregistered: Whether the region is unregistered by its caller. * No new pools should be added to the region afterwards.
*/ bool unregistered;
};
/* css node, RCU protected against region teardown */ struct list_head css_node;
/* dev node, no RCU protection required */ struct list_head region_node;
struct rcu_head rcu;
struct page_counter cnt;
bool inited;
};
/* * 3 operations require locking protection: * - Registering and unregistering region to/from list, requires global lock. * - Adding a dmem_cgroup_pool_state to a CSS, removing when CSS is freed. * - Adding a dmem_cgroup_pool_state to a region list. * * Since for the most common operations RCU provides enough protection, I * do not think more granular locking makes sense. Most protection is offered * by RCU and the lockless operating page_counter.
*/ static DEFINE_SPINLOCK(dmemcg_lock); static LIST_HEAD(dmem_cgroup_regions);
spin_lock(&dmemcg_lock);
list_for_each_entry_safe(pool, next, &dmemcs->pools, css_node) { /* *The pool is dead and all references are 0, * no need for RCU protection with list_del_rcu or freeing.
*/
list_del(&pool->css_node);
free_cg_pool(pool);
}
spin_unlock(&dmemcg_lock);
if (found_pool == test_pool) break;
}
rcu_read_unlock();
}
/** * dmem_cgroup_state_evict_valuable() - Check if we should evict from test_pool * @limit_pool: The pool for which we hit limits * @test_pool: The pool for which to test * @ignore_low: Whether we have to respect low watermarks. * @ret_hit_low: Pointer to whether it makes sense to consider low watermark. * * This function returns true if we can evict from @test_pool, false if not. * When returning false and @ignore_low is false, @ret_hit_low may * be set to true to indicate this function can be retried with @ignore_low * set to true. * * Return: bool
*/ bool dmem_cgroup_state_evict_valuable(struct dmem_cgroup_pool_state *limit_pool, struct dmem_cgroup_pool_state *test_pool, bool ignore_low, bool *ret_hit_low)
{ struct dmem_cgroup_pool_state *pool = test_pool; struct page_counter *ctest;
u64 used, min, low;
/* Can always evict from current pool, despite limits */ if (limit_pool == test_pool) returntrue;
if (limit_pool) { if (!parent_dmemcs(limit_pool->cs)) returntrue;
for (pool = test_pool; pool && limit_pool != pool; pool = pool_parent(pool))
{}
if (!pool) returnfalse;
} else { /* * If there is no cgroup limiting memory usage, use the root * cgroup instead for limit calculations.
*/ for (limit_pool = test_pool; pool_parent(limit_pool); limit_pool = pool_parent(limit_pool))
{}
}
/* * Recursively create pool, we may not initialize yet on * recursion, this is done as a separate step.
*/ for (p = dmemcs; p; p = parent_dmemcs(p)) {
pool = find_cg_pool_locked(p, region); if (!pool)
pool = alloc_pool_single(p, region, allocpool);
if (IS_ERR(pool)) return pool;
if (p == dmemcs && pool->inited) return pool;
if (pool->inited) break;
}
retpool = pool = find_cg_pool_locked(dmemcs, region); for (p = dmemcs, pp = parent_dmemcs(dmemcs); pp; p = pp, pp = parent_dmemcs(p)) { if (pool->inited) break;
/* ppool was created if it didn't exist by above loop. */
ppool = find_cg_pool_locked(pp, region);
/* Fix up parent links, mark as inited. */
pool->cnt.parent = &ppool->cnt;
pool->inited = true;
/** * dmem_cgroup_unregister_region() - Unregister a previously registered region. * @region: The region to unregister. * * This function undoes dmem_cgroup_register_region.
*/ void dmem_cgroup_unregister_region(struct dmem_cgroup_region *region)
{ struct list_head *entry;
if (!region) return;
spin_lock(&dmemcg_lock);
/* Remove from global region list */
list_del_rcu(®ion->region_node);
/* * Ensure any RCU based lookups fail. Additionally, * no new pools should be added to the dead region * by get_cg_pool_unlocked.
*/
region->unregistered = true;
spin_unlock(&dmemcg_lock);
/** * dmem_cgroup_register_region() - Register a regions for dev cgroup. * @size: Size of region to register, in bytes. * @fmt: Region parameters to register * * This function registers a node in the dmem cgroup with the * name given. After calling this function, the region can be * used for allocations. * * Return: NULL or a struct on success, PTR_ERR on failure.
*/ struct dmem_cgroup_region *dmem_cgroup_register_region(u64 size, constchar *fmt, ...)
{ struct dmem_cgroup_region *ret; char *region_name;
va_list ap;
/** * dmem_cgroup_pool_state_put() - Drop a reference to a dmem_cgroup_pool_state * @pool: &dmem_cgroup_pool_state * * Called to drop a reference to the limiting pool returned by * dmem_cgroup_try_charge().
*/ void dmem_cgroup_pool_state_put(struct dmem_cgroup_pool_state *pool)
{ if (pool)
css_put(&pool->cs->css);
}
EXPORT_SYMBOL_GPL(dmem_cgroup_pool_state_put);
/* fastpath lookup? */
rcu_read_lock();
pool = find_cg_pool_locked(cg, region); if (pool && !READ_ONCE(pool->inited))
pool = NULL;
rcu_read_unlock();
while (!pool) {
spin_lock(&dmemcg_lock); if (!region->unregistered)
pool = get_cg_pool_locked(cg, region, &allocpool); else
pool = ERR_PTR(-ENODEV);
spin_unlock(&dmemcg_lock);
if (pool == ERR_PTR(-ENOMEM)) {
pool = NULL; if (WARN_ON(allocpool)) continue;
allocpool = kzalloc(sizeof(*allocpool), GFP_KERNEL); if (allocpool) {
pool = NULL; continue;
}
}
}
kfree(allocpool); return pool;
}
/** * dmem_cgroup_uncharge() - Uncharge a pool. * @pool: Pool to uncharge. * @size: Size to uncharge. * * Undoes the effects of dmem_cgroup_try_charge. * Must be called with the returned pool as argument, * and same @index and @size.
*/ void dmem_cgroup_uncharge(struct dmem_cgroup_pool_state *pool, u64 size)
{ if (!pool) return;
/** * dmem_cgroup_try_charge() - Try charging a new allocation to a region. * @region: dmem region to charge * @size: Size (in bytes) to charge. * @ret_pool: On succesfull allocation, the pool that is charged. * @ret_limit_pool: On a failed allocation, the limiting pool. * * This function charges the @region region for a size of @size bytes. * * If the function succeeds, @ret_pool is set, which must be passed to * dmem_cgroup_uncharge() when undoing the allocation. * * When this function fails with -EAGAIN and @ret_limit_pool is non-null, it * will be set to the pool for which the limit is hit. This can be used for * eviction as argument to dmem_cgroup_evict_valuable(). This reference must be freed * with @dmem_cgroup_pool_state_put(). * * Return: 0 on success, -EAGAIN on hitting a limit, or a negative errno on failure.
*/ int dmem_cgroup_try_charge(struct dmem_cgroup_region *region, u64 size, struct dmem_cgroup_pool_state **ret_pool, struct dmem_cgroup_pool_state **ret_limit_pool)
{ struct dmemcg_state *cg; struct dmem_cgroup_pool_state *pool; struct page_counter *fail; int ret;
*ret_pool = NULL; if (ret_limit_pool)
*ret_limit_pool = NULL;
/* * hold on to css, as cgroup can be removed but resource * accounting happens on css.
*/
cg = get_current_dmemcs();
pool = get_cg_pool_unlocked(cg, region); if (IS_ERR(pool)) {
ret = PTR_ERR(pool); goto err;
}
if (!page_counter_try_charge(&pool->cnt, size, &fail)) { if (ret_limit_pool) {
*ret_limit_pool = container_of(fail, struct dmem_cgroup_pool_state, cnt);
css_get(&(*ret_limit_pool)->cs->css);
}
ret = -EAGAIN; goto err;
}
/* On success, reference from get_current_dmemcs is transferred to *ret_pool */
*ret_pool = pool; return 0;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.