/* * * Copyright IBM Corporation, 2012 * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> * * Cgroup v2 * Copyright (C) 2019 Red Hat, Inc. * Author: Giuseppe Scrivano <gscrivan@redhat.com> * * This program is free software; you can redistribute it and/or modify it * under the terms of version 2.1 of the GNU Lesser General Public License * as published by the Free Software Foundation. * * This program is distributed in the hope that it would be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *
*/
/* * TODO: this routine can waste much memory for nodes which will * never be onlined. It's better to use memory hotplug callback * function.
*/
for_each_node(node) { /* Set node_to_alloc to NUMA_NO_NODE for offline nodes. */ int node_to_alloc =
node_state(node, N_NORMAL_MEMORY) ? node : NUMA_NO_NODE;
h_cgroup->nodeinfo[node] =
kzalloc_node(sizeof(struct hugetlb_cgroup_per_node),
GFP_KERNEL, node_to_alloc); if (!h_cgroup->nodeinfo[node]) goto fail_alloc_nodeinfo;
}
/* * Should be called with hugetlb_lock held. * Since we are holding hugetlb_lock, pages cannot get moved from * active list or uncharged from the cgroup, So no need to get * page reference and test for page active here. This function * cannot fail.
*/ staticvoid hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, struct folio *folio)
{ unsignedint nr_pages; struct page_counter *counter; struct hugetlb_cgroup *hcg; struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
hcg = hugetlb_cgroup_from_folio(folio); /* * We can have pages in active list without any cgroup * ie, hugepage with less than 3 pages. We can safely * ignore those pages.
*/ if (!hcg || hcg != h_cg) goto out;
nr_pages = folio_nr_pages(folio); if (!parent) {
parent = root_h_cgroup; /* root has no limit */
page_counter_charge(&parent->hugepage[idx], nr_pages);
}
counter = &h_cg->hugepage[idx]; /* Take the pages off the local counter */
page_counter_cancel(counter, nr_pages);
set_hugetlb_cgroup(folio, parent);
out: return;
}
/* * Force the hugetlb cgroup to empty the hugetlb resources by moving them to * the parent cgroup.
*/ staticvoid hugetlb_cgroup_css_offline(struct cgroup_subsys_state *css)
{ struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); struct hstate *h; struct folio *folio;
if (hugetlb_cgroup_disabled()) goto done;
again:
rcu_read_lock();
h_cg = hugetlb_cgroup_from_task(current); if (!css_tryget(&h_cg->css)) {
rcu_read_unlock(); goto again;
}
rcu_read_unlock();
if (!page_counter_try_charge(
__hugetlb_cgroup_counter_from_cgroup(h_cg, idx, rsvd),
nr_pages, &counter)) {
ret = -ENOMEM;
hugetlb_event(h_cg, idx, HUGETLB_MAX);
css_put(&h_cg->css); goto done;
} /* Reservations take a reference to the css because they do not get * reparented.
*/ if (!rsvd)
css_put(&h_cg->css);
done:
*ptr = h_cg; return ret;
}
/* Should be called with hugetlb_lock held */ staticvoid __hugetlb_cgroup_commit_charge(int idx, unsignedlong nr_pages, struct hugetlb_cgroup *h_cg, struct folio *folio, bool rsvd)
{ if (hugetlb_cgroup_disabled() || !h_cg) return;
lockdep_assert_held(&hugetlb_lock);
__set_hugetlb_cgroup(folio, h_cg, rsvd); if (!rsvd) { unsignedlong usage =
h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; /* * This write is not atomic due to fetching usage and writing * to it, but that's fine because we call this with * hugetlb_lock held anyway.
*/
WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx],
usage + nr_pages);
}
}
/* * Should be called with hugetlb_lock held
*/ staticvoid __hugetlb_cgroup_uncharge_folio(int idx, unsignedlong nr_pages, struct folio *folio, bool rsvd)
{ struct hugetlb_cgroup *h_cg;
if (hugetlb_cgroup_disabled()) return;
lockdep_assert_held(&hugetlb_lock);
h_cg = __hugetlb_cgroup_from_folio(folio, rsvd); if (unlikely(!h_cg)) return;
__set_hugetlb_cgroup(folio, NULL, rsvd);
if (rsvd)
css_put(&h_cg->css); else { unsignedlong usage =
h_cg->nodeinfo[folio_nid(folio)]->usage[idx]; /* * This write is not atomic due to fetching usage and writing * to it, but that's fine because we call this with * hugetlb_lock held anyway.
*/
WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx],
usage - nr_pages);
}
}
if (rg->reservation_counter && resv->pages_per_hpage &&
!resv->reservation_counter) {
page_counter_uncharge(rg->reservation_counter,
nr_pages * resv->pages_per_hpage); /* * Only do css_put(rg->css) when we delete the entire region * because one file_region must hold exactly one css reference.
*/ if (region_del)
css_put(rg->css);
}
}
if (legacy) { /* Add up usage across all nodes for the non-hierarchical total. */
usage = 0;
for_each_node_state(nid, N_MEMORY)
usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]);
seq_printf(seq, "total=%lu", usage * PAGE_SIZE);
/* Simply print the per-node usage for the non-hierarchical total. */
for_each_node_state(nid, N_MEMORY)
seq_printf(seq, " N%d=%lu", nid,
READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) *
PAGE_SIZE);
seq_putc(seq, '\n');
}
/* * The hierarchical total is pretty much the value recorded by the * counter, so use that.
*/
seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "",
page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE);
/* * For each node, transverse the css tree to obtain the hierarchical * node usage.
*/
for_each_node_state(nid, N_MEMORY) {
usage = 0;
rcu_read_lock();
css_for_each_descendant_pre(css, &h_cg->css) {
usage += READ_ONCE(hugetlb_cgroup_from_css(css)
->nodeinfo[nid]
->usage[idx]);
}
rcu_read_unlock();
seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.