// SPDX-License-Identifier: GPL-2.0-only /* * RDMA resource limiting controller for cgroups. * * Used to allow a cgroup hierarchy to stop processes from consuming * additional RDMA resources after a certain limit is reached. * * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
*/
/* * Protects list of resource pools maintained on per cgroup basis * and rdma device list.
*/ static DEFINE_MUTEX(rdmacg_mutex); static LIST_HEAD(rdmacg_devices);
/* * resource table definition as to be seen by the user. * Need to add entries to it when more resources are * added/defined at IB verb/core layer.
*/ staticcharconst *rdmacg_resource_names[] = {
[RDMACG_RESOURCE_HCA_HANDLE] = "hca_handle",
[RDMACG_RESOURCE_HCA_OBJECT] = "hca_object",
};
/* resource tracker for each resource of rdma cgroup */ struct rdmacg_resource { int max; int usage;
};
/* * resource pool object which represents per cgroup, per device * resources. There are multiple instances of this object per cgroup, * therefore it cannot be embedded within rdma_cgroup structure. It * is maintained as list.
*/ struct rdmacg_resource_pool { struct rdmacg_device *device; struct rdmacg_resource resources[RDMACG_RESOURCE_MAX];
/** * uncharge_cg_locked - uncharge resource for rdma cgroup * @cg: pointer to cg to uncharge and all parents in hierarchy * @device: pointer to rdmacg device * @index: index of the resource to uncharge in cg (resource pool) * * It also frees the resource pool which was created as part of * charging operation when there are no resources attached to * resource pool.
*/ staticvoid
uncharge_cg_locked(struct rdma_cgroup *cg, struct rdmacg_device *device, enum rdmacg_resource_type index)
{ struct rdmacg_resource_pool *rpool;
rpool = find_cg_rpool_locked(cg, device);
/* * rpool cannot be null at this stage. Let kernel operate in case * if there a bug in IB stack or rdma controller, instead of crashing * the system.
*/ if (unlikely(!rpool)) {
pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device); return;
}
rpool->resources[index].usage--;
/* * A negative count (or overflow) is invalid, * it indicates a bug in the rdma controller.
*/
WARN_ON_ONCE(rpool->resources[index].usage < 0);
rpool->usage_sum--; if (rpool->usage_sum == 0 &&
rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { /* * No user of the rpool and all entries are set to max, so * safe to delete this rpool.
*/
free_cg_rpool_locked(rpool);
}
}
/** * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count * @cg: pointer to cg to uncharge and all parents in hierarchy * @device: pointer to rdmacg device * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup * stop uncharging * @index: index of the resource to uncharge in cg in given resource pool
*/ staticvoid rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg, struct rdmacg_device *device, struct rdma_cgroup *stop_cg, enum rdmacg_resource_type index)
{ struct rdma_cgroup *p;
mutex_lock(&rdmacg_mutex);
for (p = cg; p != stop_cg; p = parent_rdmacg(p))
uncharge_cg_locked(p, device, index);
mutex_unlock(&rdmacg_mutex);
css_put(&cg->css);
}
/** * rdmacg_uncharge - hierarchically uncharge rdma resource count * @cg: pointer to cg to uncharge and all parents in hierarchy * @device: pointer to rdmacg device * @index: index of the resource to uncharge in cgroup in given resource pool
*/ void rdmacg_uncharge(struct rdma_cgroup *cg, struct rdmacg_device *device, enum rdmacg_resource_type index)
{ if (index >= RDMACG_RESOURCE_MAX) return;
/** * rdmacg_try_charge - hierarchically try to charge the rdma resource * @rdmacg: pointer to rdma cgroup which will own this resource * @device: pointer to rdmacg device * @index: index of the resource to charge in cgroup (resource pool) * * This function follows charging resource in hierarchical way. * It will fail if the charge would cause the new value to exceed the * hierarchical limit. * Returns 0 if the charge succeeded, otherwise -EAGAIN, -ENOMEM or -EINVAL. * Returns pointer to rdmacg for this resource when charging is successful. * * Charger needs to account resources on two criteria. * (a) per cgroup & (b) per device resource usage. * Per cgroup resource usage ensures that tasks of cgroup doesn't cross * the configured limits. Per device provides granular configuration * in multi device usage. It allocates resource pool in the hierarchy * for each parent it come across for first resource. Later on resource * pool will be available. Therefore it will be much faster thereon * to charge/uncharge.
*/ int rdmacg_try_charge(struct rdma_cgroup **rdmacg, struct rdmacg_device *device, enum rdmacg_resource_type index)
{ struct rdma_cgroup *cg, *p; struct rdmacg_resource_pool *rpool;
s64 new; int ret = 0;
if (index >= RDMACG_RESOURCE_MAX) return -EINVAL;
/* * hold on to css, as cgroup can be removed but resource * accounting happens on css.
*/
cg = get_current_rdmacg();
mutex_lock(&rdmacg_mutex); for (p = cg; p; p = parent_rdmacg(p)) {
rpool = get_cg_rpool_locked(p, device); if (IS_ERR(rpool)) {
ret = PTR_ERR(rpool); goto err;
} else { new = rpool->resources[index].usage + 1; if (new > rpool->resources[index].max) {
ret = -EAGAIN; goto err;
} else {
rpool->resources[index].usage = new;
rpool->usage_sum++;
}
}
}
mutex_unlock(&rdmacg_mutex);
/** * rdmacg_register_device - register rdmacg device to rdma controller. * @device: pointer to rdmacg device whose resources need to be accounted. * * If IB stack wish a device to participate in rdma cgroup resource * tracking, it must invoke this API to register with rdma cgroup before * any user space application can start using the RDMA resources.
*/ void rdmacg_register_device(struct rdmacg_device *device)
{
INIT_LIST_HEAD(&device->dev_node);
INIT_LIST_HEAD(&device->rpools);
/** * rdmacg_unregister_device - unregister rdmacg device from rdma controller. * @device: pointer to rdmacg device which was previously registered with rdma * controller using rdmacg_register_device(). * * IB stack must invoke this after all the resources of the IB device * are destroyed and after ensuring that no more resources will be created * when this API is invoked.
*/ void rdmacg_unregister_device(struct rdmacg_device *device)
{ struct rdmacg_resource_pool *rpool, *tmp;
/* * Synchronize with any active resource settings, * usage query happening via configfs.
*/
mutex_lock(&rdmacg_mutex);
list_del_init(&device->dev_node);
/* * Now that this device is off the cgroup list, its safe to free * all the rpool resources.
*/
list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node)
free_cg_rpool_locked(rpool);
/* extract the device name first */
dev_name = strsep(&options, " "); if (!dev_name) {
ret = -EINVAL; goto err;
}
new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL); if (!new_limits) {
ret = -ENOMEM; goto err;
}
ret = rdmacg_parse_limits(options, new_limits, &enables); if (ret) goto parse_err;
/* acquire lock to synchronize with hot plug devices */
mutex_lock(&rdmacg_mutex);
device = rdmacg_get_device_locked(dev_name); if (!device) {
ret = -ENODEV; goto dev_err;
}
rpool = get_cg_rpool_locked(cg, device); if (IS_ERR(rpool)) {
ret = PTR_ERR(rpool); goto dev_err;
}
/* now set the new limits of the rpool */
for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX)
set_resource_limit(rpool, i, new_limits[i]);
if (rpool->usage_sum == 0 &&
rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { /* * No user of the rpool and all entries are set to max, so * safe to delete this rpool.
*/
free_cg_rpool_locked(rpool);
}
for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
seq_puts(sf, rdmacg_resource_names[i]);
seq_putc(sf, '='); if (sf_type == RDMACG_RESOURCE_TYPE_MAX) { if (rpool)
value = rpool->resources[i].max; else
value = S32_MAX;
} else { if (rpool)
value = rpool->resources[i].usage; else
value = 0;
}
/** * rdmacg_css_offline - cgroup css_offline callback * @css: css of interest * * This function is called when @css is about to go away and responsible * for shooting down all rdmacg associated with @css. As part of that it * marks all the resource pool entries to max value, so that when resources are * uncharged, associated resource pool can be freed as well.
*/ staticvoid rdmacg_css_offline(struct cgroup_subsys_state *css)
{ struct rdma_cgroup *cg = css_rdmacg(css); struct rdmacg_resource_pool *rpool;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.