/* * per-node information in memory controller.
*/ struct mem_cgroup_per_node { /* Keep the read-only fields at the start */ struct mem_cgroup *memcg; /* Back pointer, we cannot */ /* use container_of */
#ifdef CONFIG_MEMCG_V1 /* * Memcg-v1 only stuff in middle as buffer between read mostly fields * and update often fields to avoid false sharing. If v1 stuff is * not present, an explicit padding is needed.
*/
struct rb_node tree_node; /* RB tree node */ unsignedlong usage_in_excess;/* Set to the value by which */ /* the soft limit is exceeded*/ bool on_tree; #else
CACHELINE_PADDING(_pad1_); #endif
/* Fields which get updated often at the end. */ struct lruvec lruvec;
CACHELINE_PADDING(_pad2_); unsignedlong lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter;
/* For threshold */ struct mem_cgroup_threshold_ary { /* An array index points to threshold just below or equal to usage. */ int current_threshold; /* Size of entries[] */ unsignedint size; /* Array of thresholds */ struct mem_cgroup_threshold entries[] __counted_by(size);
};
struct mem_cgroup_thresholds { /* Primary thresholds array */ struct mem_cgroup_threshold_ary *primary; /* * Spare threshold array. * This is needed to make mem_cgroup_unregister_event() "never fail". * It must be able to store at least primary->size - 1 entries.
*/ struct mem_cgroup_threshold_ary *spare;
};
/* * Remember four most recent foreign writebacks with dirty pages in this * cgroup. Inode sharing is expected to be uncommon and, even if we miss * one in a given round, we're likely to catch it later if it keeps * foreign-dirtying, so a fairly low count should be enough. * * See mem_cgroup_track_foreign_dirty_slowpath() for details.
*/ #define MEMCG_CGWB_FRN_CNT 4
struct memcg_cgwb_frn {
u64 bdi_id; /* bdi->id of the foreign inode */ int memcg_id; /* memcg->css.id of foreign inode */
u64 at; /* jiffies_64 at the time of dirtying */ struct wb_completion done; /* tracks in-flight foreign writebacks */
};
/* * Bucket for arbitrarily byte-sized objects charged to a memory * cgroup. The bucket can be reparented in one piece when the cgroup * is destroyed, without having to round up the individual references * of all live memory objects in the wild.
*/ struct obj_cgroup { struct percpu_ref refcnt; struct mem_cgroup *memcg;
atomic_t nr_charged_bytes; union { struct list_head list; /* protected by objcg_lock */ struct rcu_head rcu;
};
};
/* * The memory controller data structure. The memory controller controls both * page cache and RSS per cgroup. We would eventually like to provide * statistics based on the statistics developed by Rik Van Riel for clock-pro, * to help the administrator determine what knobs to tune.
*/ struct mem_cgroup { struct cgroup_subsys_state css;
/* Private memcg ID. Used to ID objects that outlive the cgroup */ struct mem_cgroup_id id;
/* Range enforcement for interrupt charges */ struct work_struct high_work;
#ifdef CONFIG_ZSWAP unsignedlong zswap_max;
/* * Prevent pages from this memcg from being written back from zswap to * swap, and from being swapped out on zswap store failures.
*/ bool zswap_writeback; #endif
#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC /* MEMCG_KMEM for nmi context */
atomic_t kmem_stat; #endif /* * Hint of reclaim pressure for socket memroy management. Note * that this indicator should NOT be used in legacy cgroup mode * where socket memory is accounted/charged separately.
*/
u64 socket_pressure; #if BITS_PER_LONG < 64
seqlock_t socket_pressure_seqlock; #endif int kmemcg_id; /* * memcg->objcg is wiped out as a part of the objcg repaprenting * process. memcg->orig_objcg preserves a pointer (and a reference) * to the original objcg until the end of live of memcg.
*/ struct obj_cgroup __rcu *objcg; struct obj_cgroup *orig_objcg; /* list of inherited objcgs, protected by objcg_lock */ struct list_head objcg_list;
/* protected by memcg_oom_lock */ bool oom_lock; int under_oom;
/* OOM-Killer disable */ int oom_kill_disable;
/* protect arrays of thresholds */ struct mutex thresholds_lock;
/* thresholds for memory usage. RCU-protected */ struct mem_cgroup_thresholds thresholds;
/* thresholds for mem+swap usage. RCU-protected */ struct mem_cgroup_thresholds memsw_thresholds;
/* For oom notifier event fd */ struct list_head oom_notify;
/* Legacy tcp memory accounting */ bool tcpmem_active; int tcpmem_pressure;
/* List of events which userspace want to receive */ struct list_head event_list;
spinlock_t event_list_lock; #endif/* CONFIG_MEMCG_V1 */
struct mem_cgroup_per_node *nodeinfo[];
};
/* * size of first charge trial. * TODO: maybe necessary to use big numbers in big irons or dynamic based of the * workload.
*/ #define MEMCG_CHARGE_BATCH 64U
externstruct mem_cgroup *root_mem_cgroup;
enum page_memcg_data_flags { /* page->memcg_data is a pointer to an slabobj_ext vector */
MEMCG_DATA_OBJEXTS = (1UL << 0), /* page has been accounted as a non-slab kernel page */
MEMCG_DATA_KMEM = (1UL << 1), /* the next bit after the last actual flag */
__NR_MEMCG_DATA_FLAGS = (1UL << 2),
};
#define __FIRST_OBJEXT_FLAG __NR_MEMCG_DATA_FLAGS
#else/* CONFIG_MEMCG */
#define __FIRST_OBJEXT_FLAG (1UL << 0)
#endif/* CONFIG_MEMCG */
enum objext_flags { /* slabobj_ext vector failed to allocate */
OBJEXTS_ALLOC_FAIL = __FIRST_OBJEXT_FLAG, /* the next bit after the last actual flag */
__NR_OBJEXTS_FLAGS = (__FIRST_OBJEXT_FLAG << 1),
};
/* * After the initialization objcg->memcg is always pointing at * a valid memcg, but can be atomically swapped to the parent memcg. * * The caller must ensure that the returned memcg won't be released.
*/ staticinlinestruct mem_cgroup *obj_cgroup_memcg(struct obj_cgroup *objcg)
{
lockdep_assert_once(rcu_read_lock_held() || lockdep_is_held(&cgroup_mutex)); return READ_ONCE(objcg->memcg);
}
/* * __folio_memcg - Get the memory cgroup associated with a non-kmem folio * @folio: Pointer to the folio. * * Returns a pointer to the memory cgroup associated with the folio, * or NULL. This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function * against some type of folios, e.g. slab folios or ex-slab folios or * kmem folios.
*/ staticinlinestruct mem_cgroup *__folio_memcg(struct folio *folio)
{ unsignedlong memcg_data = folio->memcg_data;
/* * __folio_objcg - get the object cgroup associated with a kmem folio. * @folio: Pointer to the folio. * * Returns a pointer to the object cgroup associated with the folio, * or NULL. This function assumes that the folio is known to have a * proper object cgroup pointer. It's not safe to call this function * against some type of folios, e.g. slab folios or ex-slab folios or * LRU folios.
*/ staticinlinestruct obj_cgroup *__folio_objcg(struct folio *folio)
{ unsignedlong memcg_data = folio->memcg_data;
/* * folio_memcg - Get the memory cgroup associated with a folio. * @folio: Pointer to the folio. * * Returns a pointer to the memory cgroup associated with the folio, * or NULL. This function assumes that the folio is known to have a * proper memory cgroup pointer. It's not safe to call this function * against some type of folios, e.g. slab folios or ex-slab folios. * * For a non-kmem folio any of the following ensures folio and memcg binding * stability: * * - the folio lock * - LRU isolation * - exclusive reference * * For a kmem folio a caller should hold an rcu read lock to protect memcg * associated with a kmem folio from being released.
*/ staticinlinestruct mem_cgroup *folio_memcg(struct folio *folio)
{ if (folio_memcg_kmem(folio)) return obj_cgroup_memcg(__folio_objcg(folio)); return __folio_memcg(folio);
}
/* * folio_memcg_charged - If a folio is charged to a memory cgroup. * @folio: Pointer to the folio. * * Returns true if folio is charged to a memory cgroup, otherwise returns false.
*/ staticinlinebool folio_memcg_charged(struct folio *folio)
{ return folio->memcg_data != 0;
}
/* * folio_memcg_check - Get the memory cgroup associated with a folio. * @folio: Pointer to the folio. * * Returns a pointer to the memory cgroup associated with the folio, * or NULL. This function unlike folio_memcg() can take any folio * as an argument. It has to be used in cases when it's not known if a folio * has an associated memory cgroup pointer or an object cgroups vector or * an object cgroup. * * For a non-kmem folio any of the following ensures folio and memcg binding * stability: * * - the folio lock * - LRU isolation * - exclusive reference * * For a kmem folio a caller should hold an rcu read lock to protect memcg * associated with a kmem folio from being released.
*/ staticinlinestruct mem_cgroup *folio_memcg_check(struct folio *folio)
{ /* * Because folio->memcg_data might be changed asynchronously * for slabs, READ_ONCE() should be used here.
*/ unsignedlong memcg_data = READ_ONCE(folio->memcg_data);
if (memcg_data & MEMCG_DATA_OBJEXTS) return NULL;
if (memcg_data & MEMCG_DATA_KMEM) { struct obj_cgroup *objcg;
rcu_read_lock();
retry:
memcg = obj_cgroup_memcg(objcg); if (unlikely(!css_tryget(&memcg->css))) goto retry;
rcu_read_unlock();
return memcg;
}
/* * folio_memcg_kmem - Check if the folio has the memcg_kmem flag set. * @folio: Pointer to the folio. * * Checks if the folio has MemcgKmem flag set. The caller must ensure * that the folio has an associated memory cgroup. It's not safe to call * this function against some types of folios, e.g. slab folios.
*/ staticinlinebool folio_memcg_kmem(struct folio *folio)
{
VM_BUG_ON_PGFLAGS(PageTail(&folio->page), &folio->page);
VM_BUG_ON_FOLIO(folio->memcg_data & MEMCG_DATA_OBJEXTS, folio); return folio->memcg_data & MEMCG_DATA_KMEM;
}
/* * There is no reclaim protection applied to a targeted reclaim. * We are special casing this specific case here because * mem_cgroup_calculate_protection is not robust enough to keep * the protection invariant for calculated effective values for * parallel reclaimers with different reclaim target. This is * especially a problem for tail memcgs (as they have pages on LRU) * which would want to have effective values 0 for targeted reclaim * but a different value for external reclaim. * * Example * Let's have global and A's reclaim in parallel: * | * A (low=2G, usage = 3G, max = 3G, children_low_usage = 1.5G) * |\ * | C (low = 1G, usage = 2.5G) * B (low = 1G, usage = 0.5G) * * For the global reclaim * A.elow = A.low * B.elow = min(B.usage, B.low) because children_low_usage <= A.elow * C.elow = min(C.usage, C.low) * * With the effective values resetting we have A reclaim * A.elow = 0 * B.elow = B.low * C.elow = C.low * * If the global reclaim races with A's reclaim then * B.elow = C.elow = 0 because children_low_usage > A.elow) * is possible and reclaiming B would be violating the protection. *
*/ if (root == memcg) return;
int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp);
/** * mem_cgroup_charge - Charge a newly allocated folio to a cgroup. * @folio: Folio to charge. * @mm: mm context of the allocating task. * @gfp: Reclaim mode. * * Try to charge @folio to the memcg that @mm belongs to, reclaiming * pages according to @gfp if necessary. If @mm is NULL, try to * charge to the active memcg. * * Do not use this for folios allocated for swapin. * * Return: 0 on success. Otherwise, an error code is returned.
*/ staticinlineint mem_cgroup_charge(struct folio *folio, struct mm_struct *mm,
gfp_t gfp)
{ if (mem_cgroup_disabled()) return 0; return __mem_cgroup_charge(folio, mm, gfp);
}
int mem_cgroup_charge_hugetlb(struct folio* folio, gfp_t gfp);
/** * mem_cgroup_lruvec - get the lru list vector for a memcg & node * @memcg: memcg of the wanted lruvec * @pgdat: pglist_data * * Returns the lru list vector holding pages for a given @memcg & * @pgdat combination. This can be the node lruvec, if the memory * controller is disabled.
*/ staticinlinestruct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg, struct pglist_data *pgdat)
{ struct mem_cgroup_per_node *mz; struct lruvec *lruvec;
if (mem_cgroup_disabled()) {
lruvec = &pgdat->__lruvec; goto out;
}
if (!memcg)
memcg = root_mem_cgroup;
mz = memcg->nodeinfo[pgdat->node_id];
lruvec = &mz->lruvec;
out: /* * Since a node can be onlined after the mem_cgroup was created, * we have to be prepared to initialize lruvec->pgdat here; * and if offlined then reonlined, we need to reinitialize it.
*/ if (unlikely(lruvec->pgdat != pgdat))
lruvec->pgdat = pgdat; return lruvec;
}
/** * folio_lruvec - return lruvec for isolating/putting an LRU folio * @folio: Pointer to the folio. * * This function relies on folio->mem_cgroup being stable.
*/ staticinlinestruct lruvec *folio_lruvec(struct folio *folio)
{ struct mem_cgroup *memcg = folio_memcg(folio);
/** * parent_mem_cgroup - find the accounting parent of a memcg * @memcg: memcg whose parent to find * * Returns the parent memcg, or NULL if this is the root.
*/ staticinlinestruct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
{ return mem_cgroup_from_css(memcg->css.parent);
}
/* For now only MEMCG_MAX can happen with !allow_spinning context. */
VM_WARN_ON_ONCE(!allow_spinning && event != MEMCG_MAX);
atomic_long_inc(&memcg->memory_events_local[event]); if (!swap_event && allow_spinning)
cgroup_file_notify(&memcg->events_local_file);
do {
atomic_long_inc(&memcg->memory_events[event]); if (allow_spinning) { if (swap_event)
cgroup_file_notify(&memcg->swap_events_file); else
cgroup_file_notify(&memcg->events_file);
}
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) break; if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS) break;
} while ((memcg = parent_mem_cgroup(memcg)) &&
!mem_cgroup_is_root(memcg));
}
/* * Extended information for slab objects stored as an array in page->memcg_data * if MEMCG_DATA_OBJEXTS is set.
*/ struct slabobj_ext { #ifdef CONFIG_MEMCG struct obj_cgroup *objcg; #endif #ifdef CONFIG_MEM_ALLOC_PROFILING union codetag_ref ref; #endif
} __aligned(8);
do {
seq = read_seqbegin(&memcg->socket_pressure_seqlock);
val = memcg->socket_pressure;
} while (read_seqretry(&memcg->socket_pressure_seqlock, seq));
staticinlinevoid set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
{
} #endif
#ifdef CONFIG_MEMCG bool mem_cgroup_kmem_disabled(void); int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); void __memcg_kmem_uncharge_page(struct page *page, int order);
/* * The returned objcg pointer is safe to use without additional * protection within a scope. The scope is defined either by * the current task (similar to the "current" global variable) * or by set_active_memcg() pair. * Please, use obj_cgroup_get() to get a reference if the pointer * needs to be used outside of the local scope.
*/ struct obj_cgroup *current_obj_cgroup(void); struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.