if (charge_mem && mem_charge(smap, owner, smap->elem_size)) return NULL;
if (smap->bpf_ma) {
selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags); if (selem) /* Keep the original bpf_map_kzalloc behavior * before started using the bpf_mem_cache_alloc. * * No need to use zero_map_value. The bpf_selem_free() * only does bpf_mem_cache_free when there is * no other bpf prog is using the selem.
*/
memset(SDATA(selem)->data, 0, smap->map.value_size);
} else {
selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
gfp_flags | __GFP_NOWARN);
}
if (selem) { if (value) { /* No need to call check_and_init_map_value as memory is zero init */
copy_map_value(&smap->map, SDATA(selem)->data, value); if (swap_uptrs)
bpf_obj_swap_uptrs(smap->map.record, SDATA(selem)->data, value);
} return selem;
}
if (charge_mem)
mem_uncharge(smap, owner, smap->elem_size);
if (!bpf_ma) {
__bpf_local_storage_free(local_storage, reuse_now); return;
}
if (!reuse_now) {
call_rcu_tasks_trace(&local_storage->rcu,
bpf_local_storage_free_trace_rcu); return;
}
if (smap)
bpf_mem_cache_free(&smap->storage_ma, local_storage); else /* smap could be NULL if the selem that triggered * this 'local_storage' creation had been long gone. * In this case, directly do call_rcu().
*/
call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu);
}
void bpf_selem_free(struct bpf_local_storage_elem *selem, struct bpf_local_storage_map *smap, bool reuse_now)
{ if (!smap->bpf_ma) { /* Only task storage has uptrs and task storage * has moved to bpf_mem_alloc. Meaning smap->bpf_ma == true * for task storage, so this bpf_obj_free_fields() won't unpin * any uptr.
*/
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
__bpf_selem_free(selem, reuse_now); return;
}
if (reuse_now) { /* reuse_now == true only happens when the storage owner * (e.g. task_struct) is being destructed or the map itself * is being destructed (ie map_free). In both cases, * no bpf prog can have a hold on the selem. It is * safe to unpin the uptrs and free the selem now.
*/
bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); /* Instead of using the vanilla call_rcu(), * bpf_mem_cache_free will be able to reuse selem * immediately.
*/
bpf_mem_cache_free(&smap->selem_ma, selem); return;
}
/* The "_safe" iteration is needed. * The loop is not removing the selem from the list * but bpf_selem_free will use the selem->rcu_head * which is union-ized with the selem->free_node.
*/
hlist_for_each_entry_safe(selem, n, list, free_node) {
smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
bpf_selem_free(selem, smap, reuse_now);
}
}
/* local_storage->lock must be held and selem->local_storage == local_storage. * The caller must ensure selem->smap is still valid to be * dereferenced for its smap->elem_size and smap->cache_idx.
*/ staticbool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, struct bpf_local_storage_elem *selem, bool uncharge_mem, struct hlist_head *free_selem_list)
{ struct bpf_local_storage_map *smap; bool free_local_storage; void *owner;
/* All uncharging on the owner must be done first. * The owner may be freed once the last selem is unlinked * from local_storage.
*/ if (uncharge_mem)
mem_uncharge(smap, owner, smap->elem_size);
/* After this RCU_INIT, owner may be freed and cannot be used */
RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
/* local_storage is not freed now. local_storage->lock is * still held and raw_spin_unlock_bh(&local_storage->lock) * will be done by the caller. * * Although the unlock will be done under * rcu_read_lock(), it is more intuitive to * read if the freeing of the storage is done * after the raw_spin_unlock_bh(&local_storage->lock). * * Hence, a "bool free_local_storage" is returned * to the caller which then calls then frees the storage after * all the RCU grace periods have expired.
*/
}
hlist_del_init_rcu(&selem->snode); if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
SDATA(selem))
RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
/* local_storage->smap may be NULL. If it is, get the bpf_ma * from any selem in the local_storage->list. The bpf_ma of all * local_storage and selem should have the same value * for the same map type. * * If the local_storage->list is already empty, the caller will not * care about the bpf_ma value also because the caller is not * responsible to free the local_storage.
*/
if (storage_smap) return storage_smap->bpf_ma;
if (!selem) { struct hlist_node *n;
n = rcu_dereference_check(hlist_first_rcu(&local_storage->list),
bpf_rcu_lock_held()); if (!n) returnfalse;
void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
{ /* Always unlink from map before unlinking from local_storage * because selem will be freed after successfully unlinked from * the local_storage.
*/
bpf_selem_unlink_map(selem);
bpf_selem_unlink_storage(selem, reuse_now);
}
/* spinlock is needed to avoid racing with the * parallel delete. Otherwise, publishing an already * deleted sdata to the cache will become a use-after-free * problem in the next bpf_local_storage_lookup().
*/
raw_spin_lock_irqsave(&local_storage->lock, flags); if (selem_linked_to_storage(selem))
rcu_assign_pointer(local_storage->cache[smap->cache_idx], SDATA(selem));
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
}
owner_storage_ptr =
(struct bpf_local_storage **)owner_storage(smap, owner); /* Publish storage to the owner. * Instead of using any lock of the kernel object (i.e. owner), * cmpxchg will work with any kernel object regardless what * the running context is, bh, irq...etc. * * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage) * is protected by the storage->lock. Hence, when freeing * the owner->storage, the storage->lock must be held before * setting owner->storage ptr to NULL.
*/
prev_storage = cmpxchg(owner_storage_ptr, NULL, storage); if (unlikely(prev_storage)) {
bpf_selem_unlink_map(first_selem);
err = -EAGAIN; goto uncharge;
/* Note that even first_selem was linked to smap's * bucket->list, first_selem can be freed immediately * (instead of kfree_rcu) because * bpf_local_storage_map_free() does a * synchronize_rcu_mult (waiting for both sleepable and * normal programs) before walking the bucket->list. * Hence, no one is accessing selem from the * bucket->list under rcu_read_lock().
*/
}
/* sk cannot be going away because it is linking new elem * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0). * Otherwise, it will become a leak (and other memory issues * during map destruction).
*/ struct bpf_local_storage_data *
bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, void *value, u64 map_flags, bool swap_uptrs, gfp_t gfp_flags)
{ struct bpf_local_storage_data *old_sdata = NULL; struct bpf_local_storage_elem *alloc_selem, *selem = NULL; struct bpf_local_storage *local_storage;
HLIST_HEAD(old_selem_free_list); unsignedlong flags; int err;
/* BPF_EXIST and BPF_NOEXIST cannot be both set */ if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) || /* BPF_F_LOCK can only be used in a value with spin_lock */
unlikely((map_flags & BPF_F_LOCK) &&
!btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))) return ERR_PTR(-EINVAL);
local_storage = rcu_dereference_check(*owner_storage(smap, owner),
bpf_rcu_lock_held()); if (!local_storage || hlist_empty(&local_storage->list)) { /* Very first elem for the owner */
err = check_flags(NULL, map_flags); if (err) return ERR_PTR(err);
if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) { /* Hoping to find an old_sdata to do inline update * such that it can avoid taking the local_storage->lock * and changing the lists.
*/
old_sdata =
bpf_local_storage_lookup(local_storage, smap, false);
err = check_flags(old_sdata, map_flags); if (err) return ERR_PTR(err); if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) {
copy_map_value_locked(&smap->map, old_sdata->data,
value, false); return old_sdata;
}
}
/* A lookup has just been done before and concluded a new selem is * needed. The chance of an unnecessary alloc is unlikely.
*/
alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, swap_uptrs, gfp_flags); if (!alloc_selem) return ERR_PTR(-ENOMEM);
/* Recheck local_storage->list under local_storage->lock */ if (unlikely(hlist_empty(&local_storage->list))) { /* A parallel del is happening and local_storage is going * away. It has just been checked before, so very * unlikely. Return instead of retry to keep things * simple.
*/
err = -EAGAIN; goto unlock;
}
/* Neither the bpf_prog nor the bpf_map's syscall * could be modifying the local_storage->list now. * Thus, no elem can be added to or deleted from the * local_storage->list by the bpf_prog or by the bpf_map's syscall. * * It is racing with bpf_local_storage_map_free() alone * when unlinking elem from the local_storage->list and * the map's bucket->list.
*/
raw_spin_lock_irqsave(&local_storage->lock, flags);
hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { /* Always unlink from map before unlinking from * local_storage.
*/
bpf_selem_unlink_map(selem); /* If local_storage list has only one element, the * bpf_selem_unlink_storage_nolock() will return true. * Otherwise, it will return false. The current loop iteration * intends to remove all local storage. So the last iteration * of the loop will set the free_cgroup_storage to true.
*/
free_storage = bpf_selem_unlink_storage_nolock(
local_storage, selem, true, &free_selem_list);
}
raw_spin_unlock_irqrestore(&local_storage->lock, flags);
bpf_selem_free_list(&free_selem_list, true);
if (free_storage)
bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true);
}
/* The dynamically callocated selems are not counted currently. */
usage += sizeof(*smap->buckets) * (1ULL << smap->bucket_log); return usage;
}
/* When bpf_ma == true, the bpf_mem_alloc is used to allocate and free memory. * A deadlock free allocator is useful for storage that the bpf prog can easily * get a hold of the owner PTR_TO_BTF_ID in any context. eg. bpf_get_current_task_btf. * The task and cgroup storage fall into this case. The bpf_mem_alloc reuses * memory immediately. To be reuse-immediate safe, the owner destruction * code path needs to go through a rcu grace period before calling * bpf_local_storage_destroy(). * * When bpf_ma == false, the kmalloc and kfree are used.
*/ struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr, struct bpf_local_storage_cache *cache, bool bpf_ma)
{ struct bpf_local_storage_map *smap; unsignedint i;
u32 nbuckets; int err;
smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE); if (!smap) return ERR_PTR(-ENOMEM);
bpf_map_init_from_attr(&smap->map, attr);
nbuckets = roundup_pow_of_two(num_possible_cpus()); /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
nbuckets = max_t(u32, 2, nbuckets);
smap->bucket_log = ilog2(nbuckets);
/* In PREEMPT_RT, kmalloc(GFP_ATOMIC) is still not safe in non * preemptible context. Thus, enforce all storages to use * bpf_mem_alloc when CONFIG_PREEMPT_RT is enabled.
*/
smap->bpf_ma = IS_ENABLED(CONFIG_PREEMPT_RT) ? true : bpf_ma; if (smap->bpf_ma) {
err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false); if (err) goto free_smap;
/* Note that this map might be concurrently cloned from * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone * RCU read section to finish before proceeding. New RCU * read sections should be prevented via bpf_map_inc_not_zero.
*/
synchronize_rcu();
/* bpf prog and the userspace can no longer access this map * now. No new selem (of this map) can be added * to the owner->storage or to the map bucket's list. * * The elem of this map can be cleaned up here * or when the storage is freed e.g. * by bpf_sk_storage_free() during __sk_destruct().
*/ for (i = 0; i < (1U << smap->bucket_log); i++) {
b = &smap->buckets[i];
rcu_read_lock(); /* No one is adding to b->list now */ while ((selem = hlist_entry_safe(
rcu_dereference_raw(hlist_first_rcu(&b->list)), struct bpf_local_storage_elem, map_node))) { if (busy_counter)
this_cpu_inc(*busy_counter);
bpf_selem_unlink(selem, true); if (busy_counter)
this_cpu_dec(*busy_counter);
cond_resched_rcu();
}
rcu_read_unlock();
}
/* While freeing the storage we may still need to access the map. * * e.g. when bpf_sk_storage_free() has unlinked selem from the map * which then made the above while((selem = ...)) loop * exit immediately. * * However, while freeing the storage one still needs to access the * smap->elem_size to do the uncharging in * bpf_selem_unlink_storage_nolock(). * * Hence, wait another rcu grace period for the storage to be freed.
*/
synchronize_rcu();
if (smap->bpf_ma) {
rcu_barrier_tasks_trace(); if (!rcu_trace_implies_rcu_gp())
rcu_barrier();
bpf_mem_alloc_destroy(&smap->selem_ma);
bpf_mem_alloc_destroy(&smap->storage_ma);
}
kvfree(smap->buckets);
bpf_map_area_free(smap);
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.3 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.