/* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments * * Different map implementations will rely on rcu in map methods * lookup/update/delete, therefore eBPF programs must run under rcu lock * if program is allowed to access maps, so check rcu_read_lock_held() or * rcu_read_lock_trace_held() in all three functions.
*/
BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key)
{
WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() &&
!rcu_read_lock_bh_held()); return (unsignedlong) map->ops->map_lookup_elem(map, key);
}
/* * bpf_bprintf_prepare - Generic pass on format strings for bprintf-like helpers * * Returns a negative value if fmt is an invalid format string or 0 otherwise. * * This can be used in two ways: * - Format string verification only: when data->get_bin_args is false * - Arguments preparation: in addition to the above verification, it writes in * data->bin_args a binary representation of arguments usable by bstr_printf * where pointers from BPF have been sanitized. * * In argument preparation mode, if 0 is returned, safe temporary buffers are * allocated and bpf_bprintf_cleanup should be called to free them after use.
*/ int bpf_bprintf_prepare(constchar *fmt, u32 fmt_size, const u64 *raw_args,
u32 num_args, struct bpf_bprintf_data *data)
{ bool get_buffers = (data->get_bin_args && num_args) || data->get_buf; char *unsafe_ptr = NULL, *tmp_buf = NULL, *tmp_buf_end, *fmt_end; struct bpf_bprintf_buffers *buffers = NULL;
size_t sizeof_cur_arg, sizeof_cur_ip; int err, i, num_spec = 0;
u64 cur_arg; char fmt_ptype, cur_ip[16], ip_spec[] = "%pXX";
/* hack: bstr_printf expects IP addresses to be * pre-formatted as strings, ironically, the easiest way * to do that is to call snprintf.
*/
ip_spec[2] = fmt[i - 1];
ip_spec[3] = fmt[i];
err = snprintf(tmp_buf, tmp_buf_end - tmp_buf,
ip_spec, &cur_ip);
/* ARG_PTR_TO_CONST_STR guarantees that fmt is zero-terminated so we * can safely give an unbounded size.
*/
err = bpf_bprintf_prepare(fmt, UINT_MAX, args, num_args, &data); if (err < 0) return err;
/* BPF map elements can contain 'struct bpf_timer'. * Such map owns all of its BPF timers. * 'struct bpf_timer' is allocated as part of map element allocation * and it's zero initialized. * That space is used to keep 'struct bpf_async_kern'. * bpf_timer_init() allocates 'struct bpf_hrtimer', inits hrtimer, and * remembers 'struct bpf_map *' pointer it's part of. * bpf_timer_set_callback() increments prog refcnt and assign bpf callback_fn. * bpf_timer_start() arms the timer. * If user space reference to a map goes to zero at this point * ops->map_release_uref callback is responsible for cancelling the timers, * freeing their memory, and decrementing prog's refcnts. * bpf_timer_cancel() cancels the timer and decrements prog's refcnt. * Inner maps can contain bpf timers as well. ops->map_release_uref is * freeing the timers when inner map is replaced or deleted by user space.
*/ struct bpf_hrtimer { struct bpf_async_cb cb; struct hrtimer timer;
atomic_t cancelling;
};
/* the actual struct hidden inside uapi struct bpf_timer and bpf_wq */ struct bpf_async_kern { union { struct bpf_async_cb *cb; struct bpf_hrtimer *timer; struct bpf_work *work;
}; /* bpf_spin_lock is used here instead of spinlock_t to make * sure that it always fits into space reserved by struct bpf_timer * regardless of LOCKDEP and spinlock debug flags.
*/ struct bpf_spin_lock lock;
} __attribute__((aligned(8)));
BTF_TYPE_EMIT(struct bpf_timer);
callback_fn = rcu_dereference_check(t->cb.callback_fn, rcu_read_lock_bh_held()); if (!callback_fn) goto out;
/* bpf_timer_cb() runs in hrtimer_run_softirq. It doesn't migrate and * cannot be preempted by another bpf_timer_cb() on the same cpu. * Remember the timer this callback is servicing to prevent * deadlock if callback_fn() calls bpf_timer_cancel() or * bpf_map_delete_elem() on the same timer.
*/
this_cpu_write(hrtimer_running, t); if (map->map_type == BPF_MAP_TYPE_ARRAY) { struct bpf_array *array = container_of(map, struct bpf_array, map);
/* Cancel the timer and wait for callback to complete if it was running. * If hrtimer_cancel() can be safely called it's safe to call * kfree_rcu(t) right after for both preallocated and non-preallocated * maps. The async->cb = NULL was already done and no code path can see * address 't' anymore. Timer if armed for existing bpf_hrtimer before * bpf_timer_cancel_and_free will have been cancelled.
*/
hrtimer_cancel(&t->timer);
kfree_rcu(t, cb.rcu);
}
__bpf_spin_lock_irqsave(&async->lock);
t = async->timer; if (t) {
ret = -EBUSY; goto out;
}
/* Allocate via bpf_map_kmalloc_node() for memcg accounting. Until * kmalloc_nolock() is available, avoid locking issues by using * __GFP_HIGH (GFP_ATOMIC & ~__GFP_RECLAIM).
*/
cb = bpf_map_kmalloc_node(map, size, __GFP_HIGH, map->numa_node); if (!cb) {
ret = -ENOMEM; goto out;
}
switch (type) { case BPF_ASYNC_TYPE_TIMER:
clockid = flags & (MAX_CLOCKS - 1);
t = (struct bpf_hrtimer *)cb;
WRITE_ONCE(async->cb, cb); /* Guarantee the order between async->cb and map->usercnt. So * when there are concurrent uref release and bpf timer init, either * bpf_timer_cancel_and_free() called by uref release reads a no-NULL * timer or atomic64_read() below returns a zero usercnt.
*/
smp_mb(); if (!atomic64_read(&map->usercnt)) { /* maps with timers must be either held by user space * or pinned in bpffs.
*/
WRITE_ONCE(async->cb, NULL);
kfree(cb);
ret = -EPERM;
}
out:
__bpf_spin_unlock_irqrestore(&async->lock); return ret;
}
if (in_nmi()) return -EOPNOTSUPP;
__bpf_spin_lock_irqsave(&async->lock);
cb = async->cb; if (!cb) {
ret = -EINVAL; goto out;
} if (!atomic64_read(&cb->map->usercnt)) { /* maps with timers must be either held by user space * or pinned in bpffs. Otherwise timer might still be * running even when bpf prog is detached and user space * is gone, since map_release_uref won't ever be called.
*/
ret = -EPERM; goto out;
}
prev = cb->prog; if (prev != prog) { /* Bump prog refcnt once. Every bpf_timer_set_callback() * can pick different callback_fn-s within the same prog.
*/
prog = bpf_prog_inc_not_zero(prog); if (IS_ERR(prog)) {
ret = PTR_ERR(prog); goto out;
} if (prev) /* Drop prev prog refcnt when swapping with new prog */
bpf_prog_put(prev);
cb->prog = prog;
}
rcu_assign_pointer(cb->callback_fn, callback_fn);
out:
__bpf_spin_unlock_irqrestore(&async->lock); return ret;
}
BPF_CALL_1(bpf_timer_cancel, struct bpf_async_kern *, timer)
{ struct bpf_hrtimer *t, *cur_t; bool inc = false; int ret = 0;
if (in_nmi()) return -EOPNOTSUPP;
rcu_read_lock();
__bpf_spin_lock_irqsave(&timer->lock);
t = timer->timer; if (!t) {
ret = -EINVAL; goto out;
}
cur_t = this_cpu_read(hrtimer_running); if (cur_t == t) { /* If bpf callback_fn is trying to bpf_timer_cancel() * its own timer the hrtimer_cancel() will deadlock * since it waits for callback_fn to finish.
*/
ret = -EDEADLK; goto out;
}
/* Only account in-flight cancellations when invoked from a timer * callback, since we want to avoid waiting only if other _callbacks_ * are waiting on us, to avoid introducing lockups. Non-callback paths * are ok, since nobody would synchronously wait for their completion.
*/ if (!cur_t) goto drop;
atomic_inc(&t->cancelling); /* Need full barrier after relaxed atomic_inc */
smp_mb__after_atomic();
inc = true; if (atomic_read(&cur_t->cancelling)) { /* We're cancelling timer t, while some other timer callback is * attempting to cancel us. In such a case, it might be possible * that timer t belongs to the other callback, or some other * callback waiting upon it (creating transitive dependencies * upon us), and we will enter a deadlock if we continue * cancelling and waiting for it synchronously, since it might * do the same. Bail!
*/
ret = -EDEADLK; goto out;
}
drop:
drop_prog_refcnt(&t->cb);
out:
__bpf_spin_unlock_irqrestore(&timer->lock); /* Cancel the timer and wait for associated callback to finish * if it was running.
*/
ret = ret ?: hrtimer_cancel(&t->timer); if (inc)
atomic_dec(&t->cancelling);
rcu_read_unlock(); return ret;
}
/* Performance optimization: read async->cb without lock first. */ if (!READ_ONCE(async->cb)) return NULL;
__bpf_spin_lock_irqsave(&async->lock); /* re-read it under lock */
cb = async->cb; if (!cb) goto out;
drop_prog_refcnt(cb); /* The subsequent bpf_timer_start/cancel() helpers won't be able to use * this timer, since it won't be initialized.
*/
WRITE_ONCE(async->cb, NULL);
out:
__bpf_spin_unlock_irqrestore(&async->lock); return cb;
}
/* This function is called by map_delete/update_elem for individual element and * by ops->map_release_uref when the user space reference to a map reaches zero.
*/ void bpf_timer_cancel_and_free(void *val)
{ struct bpf_hrtimer *t;
t = (struct bpf_hrtimer *)__bpf_async_cancel_and_free(val);
if (!t) return; /* We check that bpf_map_delete/update_elem() was called from timer * callback_fn. In such case we don't call hrtimer_cancel() (since it * will deadlock) and don't call hrtimer_try_to_cancel() (since it will * just return -1). Though callback_fn is still running on this cpu it's * safe to do kfree(t) because bpf_timer_cb() read everything it needed * from 't'. The bpf subprog callback_fn won't be able to access 't', * since async->cb = NULL was already done. The timer will be * effectively cancelled because bpf_timer_cb() will return * HRTIMER_NORESTART. * * However, it is possible the timer callback_fn calling us armed the * timer _before_ calling us, such that failing to cancel it here will * cause it to possibly use struct hrtimer after freeing bpf_hrtimer. * Therefore, we _need_ to cancel any outstanding timers before we do * kfree_rcu, even though no more timers can be armed. * * Moreover, we need to schedule work even if timer does not belong to * the calling callback_fn, as on two different CPUs, we can end up in a * situation where both sides run in parallel, try to cancel one * another, and we end up waiting on both sides in hrtimer_cancel * without making forward progress, since timer1 depends on time2 * callback to finish, and vice versa. * * CPU 1 (timer1_cb) CPU 2 (timer2_cb) * bpf_timer_cancel_and_free(timer2) bpf_timer_cancel_and_free(timer1) * * To avoid these issues, punt to workqueue context when we are in a * timer callback.
*/ if (this_cpu_read(hrtimer_running)) {
queue_work(system_unbound_wq, &t->cb.delete_work); return;
}
if (IS_ENABLED(CONFIG_PREEMPT_RT)) { /* If the timer is running on other CPU, also use a kworker to * wait for the completion of the timer instead of trying to * acquire a sleepable lock in hrtimer_cancel() to wait for its * completion.
*/ if (hrtimer_try_to_cancel(&t->timer) >= 0)
kfree_rcu(t, cb.rcu); else
queue_work(system_unbound_wq, &t->cb.delete_work);
} else {
bpf_timer_delete_work(&t->cb.delete_work);
}
}
/* This function is called by map_delete/update_elem for individual element and * by ops->map_release_uref when the user space reference to a map reaches zero.
*/ void bpf_wq_cancel_and_free(void *val)
{ struct bpf_work *work;
BTF_TYPE_EMIT(struct bpf_wq);
work = (struct bpf_work *)__bpf_async_cancel_and_free(val); if (!work) return; /* Trigger cancel of the sleepable work, but *do not* wait for * it to finish if it was running as we might not be in a * sleepable context. * kfree will be called once the work has finished.
*/
schedule_work(&work->delete_work);
}
conststruct bpf_func_proto *
bpf_base_func_proto(enum bpf_func_id func_id, conststruct bpf_prog *prog)
{ switch (func_id) { case BPF_FUNC_map_lookup_elem: return &bpf_map_lookup_elem_proto; case BPF_FUNC_map_update_elem: return &bpf_map_update_elem_proto; case BPF_FUNC_map_delete_elem: return &bpf_map_delete_elem_proto; case BPF_FUNC_map_push_elem: return &bpf_map_push_elem_proto; case BPF_FUNC_map_pop_elem: return &bpf_map_pop_elem_proto; case BPF_FUNC_map_peek_elem: return &bpf_map_peek_elem_proto; case BPF_FUNC_map_lookup_percpu_elem: return &bpf_map_lookup_percpu_elem_proto; case BPF_FUNC_get_prandom_u32: return &bpf_get_prandom_u32_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_raw_smp_processor_id_proto; case BPF_FUNC_get_numa_node_id: return &bpf_get_numa_node_id_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; case BPF_FUNC_ktime_get_boot_ns: return &bpf_ktime_get_boot_ns_proto; case BPF_FUNC_ktime_get_tai_ns: return &bpf_ktime_get_tai_ns_proto; case BPF_FUNC_ringbuf_output: return &bpf_ringbuf_output_proto; case BPF_FUNC_ringbuf_reserve: return &bpf_ringbuf_reserve_proto; case BPF_FUNC_ringbuf_submit: return &bpf_ringbuf_submit_proto; case BPF_FUNC_ringbuf_discard: return &bpf_ringbuf_discard_proto; case BPF_FUNC_ringbuf_query: return &bpf_ringbuf_query_proto; case BPF_FUNC_strncmp: return &bpf_strncmp_proto; case BPF_FUNC_strtol: return &bpf_strtol_proto; case BPF_FUNC_strtoul: return &bpf_strtoul_proto; case BPF_FUNC_get_current_pid_tgid: return &bpf_get_current_pid_tgid_proto; case BPF_FUNC_get_ns_current_pid_tgid: return &bpf_get_ns_current_pid_tgid_proto; case BPF_FUNC_get_current_uid_gid: return &bpf_get_current_uid_gid_proto; default: break;
}
if (!bpf_token_capable(prog->aux->token, CAP_BPF)) return NULL;
switch (func_id) { case BPF_FUNC_spin_lock: return &bpf_spin_lock_proto; case BPF_FUNC_spin_unlock: return &bpf_spin_unlock_proto; case BPF_FUNC_jiffies64: return &bpf_jiffies64_proto; case BPF_FUNC_per_cpu_ptr: return &bpf_per_cpu_ptr_proto; case BPF_FUNC_this_cpu_ptr: return &bpf_this_cpu_ptr_proto; case BPF_FUNC_timer_init: return &bpf_timer_init_proto; case BPF_FUNC_timer_set_callback: return &bpf_timer_set_callback_proto; case BPF_FUNC_timer_start: return &bpf_timer_start_proto; case BPF_FUNC_timer_cancel: return &bpf_timer_cancel_proto; case BPF_FUNC_kptr_xchg: return &bpf_kptr_xchg_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; case BPF_FUNC_loop: return &bpf_loop_proto; case BPF_FUNC_user_ringbuf_drain: return &bpf_user_ringbuf_drain_proto; case BPF_FUNC_ringbuf_reserve_dynptr: return &bpf_ringbuf_reserve_dynptr_proto; case BPF_FUNC_ringbuf_submit_dynptr: return &bpf_ringbuf_submit_dynptr_proto; case BPF_FUNC_ringbuf_discard_dynptr: return &bpf_ringbuf_discard_dynptr_proto; case BPF_FUNC_dynptr_from_mem: return &bpf_dynptr_from_mem_proto; case BPF_FUNC_dynptr_read: return &bpf_dynptr_read_proto; case BPF_FUNC_dynptr_write: return &bpf_dynptr_write_proto; case BPF_FUNC_dynptr_data: return &bpf_dynptr_data_proto; #ifdef CONFIG_CGROUPS case BPF_FUNC_cgrp_storage_get: return &bpf_cgrp_storage_get_proto; case BPF_FUNC_cgrp_storage_delete: return &bpf_cgrp_storage_delete_proto; case BPF_FUNC_get_current_cgroup_id: return &bpf_get_current_cgroup_id_proto; case BPF_FUNC_get_current_ancestor_cgroup_id: return &bpf_get_current_ancestor_cgroup_id_proto; case BPF_FUNC_current_task_under_cgroup: return &bpf_current_task_under_cgroup_proto; #endif #ifdef CONFIG_CGROUP_NET_CLASSID case BPF_FUNC_get_cgroup_classid: return &bpf_get_cgroup_classid_curr_proto; #endif case BPF_FUNC_task_storage_get: if (bpf_prog_check_recur(prog)) return &bpf_task_storage_get_recur_proto; return &bpf_task_storage_get_proto; case BPF_FUNC_task_storage_delete: if (bpf_prog_check_recur(prog)) return &bpf_task_storage_delete_recur_proto; return &bpf_task_storage_delete_proto; default: break;
}
if (!bpf_token_capable(prog->aux->token, CAP_PERFMON)) return NULL;
switch (func_id) { case BPF_FUNC_trace_printk: return bpf_get_trace_printk_proto(); case BPF_FUNC_get_current_task: return &bpf_get_current_task_proto; case BPF_FUNC_get_current_task_btf: return &bpf_get_current_task_btf_proto; case BPF_FUNC_get_current_comm: return &bpf_get_current_comm_proto; case BPF_FUNC_probe_read_user: return &bpf_probe_read_user_proto; case BPF_FUNC_probe_read_kernel: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_kernel_proto; case BPF_FUNC_probe_read_user_str: return &bpf_probe_read_user_str_proto; case BPF_FUNC_probe_read_kernel_str: return security_locked_down(LOCKDOWN_BPF_READ_KERNEL) < 0 ?
NULL : &bpf_probe_read_kernel_str_proto; case BPF_FUNC_copy_from_user: return &bpf_copy_from_user_proto; case BPF_FUNC_copy_from_user_task: return &bpf_copy_from_user_task_proto; case BPF_FUNC_snprintf_btf: return &bpf_snprintf_btf_proto; case BPF_FUNC_snprintf: return &bpf_snprintf_proto; case BPF_FUNC_task_pt_regs: return &bpf_task_pt_regs_proto; case BPF_FUNC_trace_vprintk: return bpf_get_trace_vprintk_proto(); case BPF_FUNC_perf_event_read_value: return bpf_get_perf_event_read_value_proto(); case BPF_FUNC_perf_event_read: return &bpf_perf_event_read_proto; case BPF_FUNC_send_signal: return &bpf_send_signal_proto; case BPF_FUNC_send_signal_thread: return &bpf_send_signal_thread_proto; case BPF_FUNC_get_task_stack: return prog->sleepable ? &bpf_get_task_stack_sleepable_proto
: &bpf_get_task_stack_proto; case BPF_FUNC_get_branch_snapshot: return &bpf_get_branch_snapshot_proto; case BPF_FUNC_find_vma: return &bpf_find_vma_proto; default: return NULL;
}
}
EXPORT_SYMBOL_GPL(bpf_base_func_proto);
/* Do the actual list draining outside the lock to not hold the lock for * too long, and also prevent deadlocks if tracing programs end up * executing on entry/exit of functions called inside the critical * section, and end up doing map ops that call bpf_list_head_free for * the same map value again.
*/
__bpf_spin_lock_irqsave(spin_lock); if (!head->next || list_empty(head)) goto unlock;
head = head->next;
unlock:
INIT_LIST_HEAD(orig_head);
__bpf_spin_unlock_irqrestore(spin_lock);
while (head != orig_head) { void *obj = head;
obj -= field->graph_root.node_offset;
head = head->next; /* The contained type can also have resources, including a * bpf_list_head which needs to be freed.
*/
__bpf_obj_drop_impl(obj, field->graph_root.value_rec, false);
}
}
/* Like rbtree_postorder_for_each_entry_safe, but 'pos' and 'n' are * 'rb_node *', so field name of rb_node within containing struct is not * needed. * * Since bpf_rb_tree's node type has a corresponding struct btf_field with * graph_root.node_offset, it's not necessary to know field name * or type of node struct
*/ #define bpf_rbtree_postorder_for_each_entry_safe(pos, n, root) \ for (pos = rb_first_postorder(root); \
pos && ({ n = rb_next_postorder(pos); 1; }); \
pos = n)
/* The verifier has ensured that meta__ign must be NULL */ return bpf_mem_alloc(&bpf_global_percpu_ma, size);
}
/* Must be called under migrate_disable(), as required by bpf_mem_free */ void __bpf_obj_drop_impl(void *p, conststruct btf_record *rec, bool percpu)
{ struct bpf_mem_alloc *ma;
if (rec && rec->refcount_off >= 0 &&
!refcount_dec_and_test((refcount_t *)(p + rec->refcount_off))) { /* Object is refcounted and refcount_dec didn't result in 0 * refcount. Return without freeing the object
*/ return;
}
if (rec)
bpf_obj_free_fields(rec, p);
if (percpu)
ma = &bpf_global_percpu_ma; else
ma = &bpf_global_ma;
bpf_mem_free_rcu(ma, p);
}
__bpf_obj_drop_impl(p, meta ? meta->record : NULL, false);
}
__bpf_kfunc void bpf_percpu_obj_drop_impl(void *p__alloc, void *meta__ign)
{ /* The verifier has ensured that meta__ign must be NULL */
bpf_mem_free_rcu(&bpf_global_percpu_ma, p__alloc);
}
/* Could just cast directly to refcount_t *, but need some code using * bpf_refcount type so that it is emitted in vmlinux BTF
*/
ref = (struct bpf_refcount *)(p__refcounted_kptr + meta->record->refcount_off); if (!refcount_inc_not_zero((refcount_t *)ref)) return NULL;
/* Verifier strips KF_RET_NULL if input is owned ref, see is_kfunc_ret_null * in verifier.c
*/ return (void *)p__refcounted_kptr;
}
/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't * called on its fields, so init here
*/ if (unlikely(!h->next))
INIT_LIST_HEAD(h);
/* node->owner != NULL implies !list_empty(n), no need to separately * check the latter
*/ if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) { /* Only called from BPF prog, no need to migrate_disable */
__bpf_obj_drop_impl((void *)n - off, rec, false); return -EINVAL;
}
/* If list_head was 0-initialized by map, bpf_obj_init_field wasn't * called on its fields, so init here
*/ if (unlikely(!h->next))
INIT_LIST_HEAD(h); if (list_empty(h)) return NULL;
n = tail ? h->prev : h->next;
node = container_of(n, struct bpf_list_node_kern, list_head); if (WARN_ON_ONCE(READ_ONCE(node->owner) != head)) return NULL;
/* node_internal->owner != root implies either RB_EMPTY_NODE(n) or * n is owned by some other tree. No need to check RB_EMPTY_NODE(n)
*/ if (READ_ONCE(node_internal->owner) != root) return NULL;
/* Need to copy rbtree_add_cached's logic here because our 'less' is a BPF * program
*/ staticint __bpf_rbtree_add(struct bpf_rb_root *root, struct bpf_rb_node_kern *node, void *less, struct btf_record *rec, u64 off)
{ struct rb_node **link = &((struct rb_root_cached *)root)->rb_root.rb_node; struct rb_node *parent = NULL, *n = &node->rb_node;
bpf_callback_t cb = (bpf_callback_t)less; bool leftmost = true;
/* node->owner != NULL implies !RB_EMPTY_NODE(n), no need to separately * check the latter
*/ if (cmpxchg(&node->owner, NULL, BPF_PTR_POISON)) { /* Only called from BPF prog, no need to migrate_disable */
__bpf_obj_drop_impl((void *)n - off, rec, false); return -EINVAL;
}
while (*link) {
parent = *link; if (cb((uintptr_t)node, (uintptr_t)parent, 0, 0, 0)) {
link = &parent->rb_left;
} else {
link = &parent->rb_right;
leftmost = false;
}
}
/** * bpf_task_acquire - Acquire a reference to a task. A task acquired by this * kfunc which is not stored in a map as a kptr, must be released by calling * bpf_task_release(). * @p: The task on which a reference is being acquired.
*/
__bpf_kfunc struct task_struct *bpf_task_acquire(struct task_struct *p)
{ if (refcount_inc_not_zero(&p->rcu_users)) return p; return NULL;
}
/** * bpf_task_release - Release the reference acquired on a task. * @p: The task on which a reference is being released.
*/
__bpf_kfunc void bpf_task_release(struct task_struct *p)
{
put_task_struct_rcu_user(p);
}
#ifdef CONFIG_CGROUPS /** * bpf_cgroup_acquire - Acquire a reference to a cgroup. A cgroup acquired by * this kfunc which is not stored in a map as a kptr, must be released by * calling bpf_cgroup_release(). * @cgrp: The cgroup on which a reference is being acquired.
*/
__bpf_kfunc struct cgroup *bpf_cgroup_acquire(struct cgroup *cgrp)
{ return cgroup_tryget(cgrp) ? cgrp : NULL;
}
/** * bpf_cgroup_release - Release the reference acquired on a cgroup. * If this kfunc is invoked in an RCU read region, the cgroup is guaranteed to * not be freed until the current grace period has ended, even if its refcount * drops to 0. * @cgrp: The cgroup on which a reference is being released.
*/
__bpf_kfunc void bpf_cgroup_release(struct cgroup *cgrp)
{
cgroup_put(cgrp);
}
/** * bpf_cgroup_ancestor - Perform a lookup on an entry in a cgroup's ancestor * array. A cgroup returned by this kfunc which is not subsequently stored in a * map, must be released by calling bpf_cgroup_release(). * @cgrp: The cgroup for which we're performing a lookup. * @level: The level of ancestor to look up.
*/
__bpf_kfunc struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level)
{ struct cgroup *ancestor;
if (level > cgrp->level || level < 0) return NULL;
/* cgrp's refcnt could be 0 here, but ancestors can still be accessed */
ancestor = cgrp->ancestors[level]; if (!cgroup_tryget(ancestor)) return NULL; return ancestor;
}
/** * bpf_cgroup_from_id - Find a cgroup from its ID. A cgroup returned by this * kfunc which is not subsequently stored in a map, must be released by calling * bpf_cgroup_release(). * @cgid: cgroup id.
*/
__bpf_kfunc struct cgroup *bpf_cgroup_from_id(u64 cgid)
{ struct cgroup *cgrp;
cgrp = __cgroup_get_from_id(cgid); if (IS_ERR(cgrp)) return NULL; return cgrp;
}
/** * bpf_task_under_cgroup - wrap task_under_cgroup_hierarchy() as a kfunc, test * task's membership of cgroup ancestry. * @task: the task to be tested * @ancestor: possible ancestor of @task's cgroup * * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor. * It follows all the same rules as cgroup_is_descendant, and only applies * to the default hierarchy.
*/
__bpf_kfunc long bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor)
{ long ret;
rcu_read_lock();
ret = task_under_cgroup_hierarchy(task, ancestor);
rcu_read_unlock(); return ret;
}
/** * bpf_task_get_cgroup1 - Acquires the associated cgroup of a task within a * specific cgroup1 hierarchy. The cgroup1 hierarchy is identified by its * hierarchy ID. * @task: The target task * @hierarchy_id: The ID of a cgroup1 hierarchy * * On success, the cgroup is returen. On failure, NULL is returned.
*/
__bpf_kfunc struct cgroup *
bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id)
{ struct cgroup *cgrp = task_get_cgroup1(task, hierarchy_id);
if (IS_ERR(cgrp)) return NULL; return cgrp;
} #endif/* CONFIG_CGROUPS */
/** * bpf_task_from_pid - Find a struct task_struct from its pid by looking it up * in the root pid namespace idr. If a task is returned, it must either be * stored in a map, or released with bpf_task_release(). * @pid: The pid of the task being looked up.
*/
__bpf_kfunc struct task_struct *bpf_task_from_pid(s32 pid)
{ struct task_struct *p;
rcu_read_lock();
p = find_task_by_pid_ns(pid, &init_pid_ns); if (p)
p = bpf_task_acquire(p);
rcu_read_unlock();
return p;
}
/** * bpf_task_from_vpid - Find a struct task_struct from its vpid by looking it up * in the pid namespace of the current task. If a task is returned, it must * either be stored in a map, or released with bpf_task_release(). * @vpid: The vpid of the task being looked up.
*/
__bpf_kfunc struct task_struct *bpf_task_from_vpid(s32 vpid)
{ struct task_struct *p;
rcu_read_lock();
p = find_task_by_vpid(vpid); if (p)
p = bpf_task_acquire(p);
rcu_read_unlock();
return p;
}
/** * bpf_dynptr_slice() - Obtain a read-only pointer to the dynptr data. * @p: The dynptr whose data slice to retrieve * @offset: Offset into the dynptr * @buffer__opt: User-provided buffer to copy contents into. May be NULL * @buffer__szk: Size (in bytes) of the buffer if present. This is the * length of the requested slice. This must be a constant. * * For non-skb and non-xdp type dynptrs, there is no difference between * bpf_dynptr_slice and bpf_dynptr_data. * * If buffer__opt is NULL, the call will fail if buffer_opt was needed. * * If the intention is to write to the data slice, please use * bpf_dynptr_slice_rdwr. * * The user must check that the returned pointer is not null before using it. * * Please note that in the case of skb and xdp dynptrs, bpf_dynptr_slice * does not change the underlying packet data pointers, so a call to * bpf_dynptr_slice will not invalidate any ctx->data/data_end pointers in * the bpf program. * * Return: NULL if the call failed (eg invalid dynptr), pointer to a read-only * data slice (can be either direct pointer to the data or a pointer to the user * provided buffer, with its contents containing the data, if unable to obtain * direct pointer)
*/
__bpf_kfunc void *bpf_dynptr_slice(conststruct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk)
{ conststruct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p; enum bpf_dynptr_type type;
u32 len = buffer__szk; int err;
if (!ptr->data) return NULL;
err = bpf_dynptr_check_off_len(ptr, offset, len); if (err) return NULL;
type = bpf_dynptr_get_type(ptr);
switch (type) { case BPF_DYNPTR_TYPE_LOCAL: case BPF_DYNPTR_TYPE_RINGBUF: return ptr->data + ptr->offset + offset; case BPF_DYNPTR_TYPE_SKB: if (buffer__opt) return skb_header_pointer(ptr->data, ptr->offset + offset, len, buffer__opt); else return skb_pointer_if_linear(ptr->data, ptr->offset + offset, len); case BPF_DYNPTR_TYPE_XDP:
{ void *xdp_ptr = bpf_xdp_pointer(ptr->data, ptr->offset + offset, len); if (!IS_ERR_OR_NULL(xdp_ptr)) return xdp_ptr;
/** * bpf_dynptr_slice_rdwr() - Obtain a writable pointer to the dynptr data. * @p: The dynptr whose data slice to retrieve * @offset: Offset into the dynptr * @buffer__opt: User-provided buffer to copy contents into. May be NULL * @buffer__szk: Size (in bytes) of the buffer if present. This is the * length of the requested slice. This must be a constant. * * For non-skb and non-xdp type dynptrs, there is no difference between * bpf_dynptr_slice and bpf_dynptr_data. * * If buffer__opt is NULL, the call will fail if buffer_opt was needed. * * The returned pointer is writable and may point to either directly the dynptr * data at the requested offset or to the buffer if unable to obtain a direct * data pointer to (example: the requested slice is to the paged area of an skb * packet). In the case where the returned pointer is to the buffer, the user * is responsible for persisting writes through calling bpf_dynptr_write(). This * usually looks something like this pattern: * * struct eth_hdr *eth = bpf_dynptr_slice_rdwr(&dynptr, 0, buffer, sizeof(buffer)); * if (!eth) * return TC_ACT_SHOT; * * // mutate eth header // * * if (eth == buffer) * bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0); * * Please note that, as in the example above, the user must check that the * returned pointer is not null before using it. * * Please also note that in the case of skb and xdp dynptrs, bpf_dynptr_slice_rdwr * does not change the underlying packet data pointers, so a call to * bpf_dynptr_slice_rdwr will not invalidate any ctx->data/data_end pointers in * the bpf program. * * Return: NULL if the call failed (eg invalid dynptr), pointer to a * data slice (can be either direct pointer to the data or a pointer to the user * provided buffer, with its contents containing the data, if unable to obtain * direct pointer)
*/
__bpf_kfunc void *bpf_dynptr_slice_rdwr(conststruct bpf_dynptr *p, u32 offset, void *buffer__opt, u32 buffer__szk)
{ conststruct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
if (!ptr->data || __bpf_dynptr_is_rdonly(ptr)) return NULL;
/* bpf_dynptr_slice_rdwr is the same logic as bpf_dynptr_slice. * * For skb-type dynptrs, it is safe to write into the returned pointer * if the bpf program allows skb data writes. There are two possibilities * that may occur when calling bpf_dynptr_slice_rdwr: * * 1) The requested slice is in the head of the skb. In this case, the * returned pointer is directly to skb data, and if the skb is cloned, the * verifier will have uncloned it (see bpf_unclone_prologue()) already. * The pointer can be directly written into. * * 2) Some portion of the requested slice is in the paged buffer area. * In this case, the requested data will be copied out into the buffer * and the returned pointer will be a pointer to the buffer. The skb * will not be pulled. To persist the write, the user will need to call * bpf_dynptr_write(), which will pull the skb and commit the write. * * Similarly for xdp programs, if the requested slice is not across xdp * fragments, then a direct pointer will be returned, otherwise the data * will be copied out into the buffer and the user will need to call * bpf_dynptr_write() to commit changes.
*/ return bpf_dynptr_slice(p, offset, buffer__opt, buffer__szk);
}
if (!ptr->data) {
bpf_dynptr_set_null(clone); return -EINVAL;
}
*clone = *ptr;
return 0;
}
/** * bpf_dynptr_copy() - Copy data from one dynptr to another. * @dst_ptr: Destination dynptr - where data should be copied to * @dst_off: Offset into the destination dynptr * @src_ptr: Source dynptr - where data should be copied from * @src_off: Offset into the source dynptr * @size: Length of the data to copy from source to destination * * Copies data from source dynptr to destination dynptr. * Returns 0 on success; negative error, otherwise.
*/
__bpf_kfunc int bpf_dynptr_copy(struct bpf_dynptr *dst_ptr, u32 dst_off, struct bpf_dynptr *src_ptr, u32 src_off, u32 size)
{ struct bpf_dynptr_kern *dst = (struct bpf_dynptr_kern *)dst_ptr; struct bpf_dynptr_kern *src = (struct bpf_dynptr_kern *)src_ptr; void *src_slice, *dst_slice; char buf[256];
u32 off;
/** * bpf_dynptr_memset() - Fill dynptr memory with a constant byte. * @p: Destination dynptr - where data will be filled * @offset: Offset into the dynptr to start filling from * @size: Number of bytes to fill * @val: Constant byte to fill the memory with * * Fills the @size bytes of the memory area pointed to by @p * at @offset with the constant byte @val. * Returns 0 on success; negative error, otherwise.
*/
__bpf_kfunc int bpf_dynptr_memset(struct bpf_dynptr *p, u32 offset, u32 size, u8 val)
{ struct bpf_dynptr_kern *ptr = (struct bpf_dynptr_kern *)p;
u32 chunk_sz, write_off; char buf[256]; void* slice; int err;
/* * The RCU read lock is held to safely traverse the latch tree, but we * don't need its protection when accessing the prog, since it has an * active stack frame on the current stack trace, and won't disappear.
*/
rcu_read_lock();
prog = bpf_prog_ksym_find(ip);
rcu_read_unlock(); if (!prog) return !ctx->cnt;
ctx->cnt++; if (bpf_is_subprog(prog)) returntrue;
ctx->aux = prog->aux;
ctx->sp = sp;
ctx->bp = bp; returnfalse;
}
arch_bpf_stack_walk(bpf_stack_walker, &ctx);
WARN_ON_ONCE(!ctx.aux); if (ctx.aux)
WARN_ON_ONCE(!ctx.aux->exception_boundary);
WARN_ON_ONCE(!ctx.bp);
WARN_ON_ONCE(!ctx.cnt); /* Prevent KASAN false positives for CONFIG_KASAN_STACK by unpoisoning * deeper stack depths than ctx.sp as we do not return from bpf_throw, * which skips compiler generated instrumentation to do the same.
*/
kasan_unpoison_task_stack_below((void *)(long)ctx.sp);
ctx.aux->bpf_exception_cb(cookie, ctx.sp, ctx.bp, 0, 0);
WARN(1, "A call to BPF exception callback should never return\n");
}
struct bpf_iter_bits_kern { union {
__u64 *bits;
__u64 bits_copy;
}; int nr_bits; int bit;
} __aligned(8);
/* On 64-bit hosts, unsigned long and u64 have the same size, so passing * a u64 pointer and an unsigned long pointer to find_next_bit() will * return the same result, as both point to the same 8-byte area. * * For 32-bit little-endian hosts, using a u64 pointer or unsigned long * pointer also makes no difference. This is because the first iterated * unsigned long is composed of bits 0-31 of the u64 and the second unsigned * long is composed of bits 32-63 of the u64. * * However, for 32-bit big-endian hosts, this is not the case. The first * iterated unsigned long will be bits 32-63 of the u64, so swap these two * ulong values within the u64.
*/ staticvoid swap_ulong_in_u64(u64 *bits, unsignedint nr)
{ #if (BITS_PER_LONG == 32) && defined(__BIG_ENDIAN) unsignedint i;
for (i = 0; i < nr; i++)
bits[i] = (bits[i] >> 32) | ((u64)(u32)bits[i] << 32); #endif
}
/** * bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area * @it: The new bpf_iter_bits to be created * @unsafe_ptr__ign: A pointer pointing to a memory area to be iterated over * @nr_words: The size of the specified memory area, measured in 8-byte units. * The maximum value of @nr_words is @BITS_ITER_NR_WORDS_MAX. This limit may be * further reduced by the BPF memory allocator implementation. * * This function initializes a new bpf_iter_bits structure for iterating over * a memory area which is specified by the @unsafe_ptr__ign and @nr_words. It * copies the data of the memory area to the newly created bpf_iter_bits @it for * subsequent iteration operations. * * On success, 0 is returned. On failure, ERR is returned.
*/
__bpf_kfunc int
bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, u32 nr_words)
{ struct bpf_iter_bits_kern *kit = (void *)it;
u32 nr_bytes = nr_words * sizeof(u64);
u32 nr_bits = BYTES_TO_BITS(nr_bytes); int err;
/** * bpf_iter_bits_next() - Get the next bit in a bpf_iter_bits * @it: The bpf_iter_bits to be checked * * This function returns a pointer to a number representing the value of the * next bit in the bits. * * If there are no further bits available, it returns NULL.
*/
__bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
{ struct bpf_iter_bits_kern *kit = (void *)it; int bit = kit->bit, nr_bits = kit->nr_bits; constvoid *bits;
if (!nr_bits || bit >= nr_bits) return NULL;
bits = nr_bits == 64 ? &kit->bits_copy : kit->bits;
bit = find_next_bit(bits, nr_bits, bit + 1); if (bit >= nr_bits) {
kit->bit = bit; return NULL;
}
kit->bit = bit; return &kit->bit;
}
/** * bpf_iter_bits_destroy() - Destroy a bpf_iter_bits * @it: The bpf_iter_bits to be destroyed * * Destroy the resource associated with the bpf_iter_bits.
*/
__bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it)
{ struct bpf_iter_bits_kern *kit = (void *)it;
if (kit->nr_bits <= 64) return;
bpf_mem_free(&bpf_global_ma, kit->bits);
}
/** * bpf_copy_from_user_str() - Copy a string from an unsafe user address * @dst: Destination address, in kernel space. This buffer must be * at least @dst__sz bytes long. * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL. * @unsafe_ptr__ign: Source address, in user space. * @flags: The only supported flag is BPF_F_PAD_ZEROS * * Copies a NUL-terminated string from userspace to BPF space. If user string is * too long this will still ensure zero termination in the dst buffer unless * buffer size is 0. * * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst to 0 on success and * memset all of @dst on failure.
*/
__bpf_kfunc int bpf_copy_from_user_str(void *dst, u32 dst__sz, constvoid __user *unsafe_ptr__ign, u64 flags)
{ int ret;
if (unlikely(flags & ~BPF_F_PAD_ZEROS)) return -EINVAL;
if (unlikely(!dst__sz)) return 0;
ret = strncpy_from_user(dst, unsafe_ptr__ign, dst__sz - 1); if (ret < 0) { if (flags & BPF_F_PAD_ZEROS)
memset((char *)dst, 0, dst__sz);
/** * bpf_copy_from_user_task_str() - Copy a string from an task's address space * @dst: Destination address, in kernel space. This buffer must be * at least @dst__sz bytes long. * @dst__sz: Maximum number of bytes to copy, includes the trailing NUL. * @unsafe_ptr__ign: Source address in the task's address space. * @tsk: The task whose address space will be used * @flags: The only supported flag is BPF_F_PAD_ZEROS * * Copies a NUL terminated string from a task's address space to @dst__sz * buffer. If user string is too long this will still ensure zero termination * in the @dst__sz buffer unless buffer size is 0. * * If BPF_F_PAD_ZEROS flag is set, memset the tail of @dst__sz to 0 on success * and memset all of @dst__sz on failure. * * Return: The number of copied bytes on success including the NUL terminator. * A negative error code on failure.
*/
__bpf_kfunc int bpf_copy_from_user_task_str(void *dst, u32 dst__sz, constvoid __user *unsafe_ptr__ign, struct task_struct *tsk, u64 flags)
{ int ret;
if (unlikely(flags & ~BPF_F_PAD_ZEROS)) return -EINVAL;
if (unlikely(dst__sz == 0)) return 0;
ret = copy_remote_vm_str(tsk, (unsignedlong)unsafe_ptr__ign, dst, dst__sz, 0); if (ret < 0) { if (flags & BPF_F_PAD_ZEROS)
memset(dst, 0, dst__sz); return ret;
}
/* Keep unsinged long in prototype so that kfunc is usable when emitted to * vmlinux.h in BPF programs directly, but note that while in BPF prog, the * unsigned long always points to 8-byte region on stack, the kernel may only * read and write the 4-bytes on 32-bit.
*/
__bpf_kfunc void bpf_local_irq_save(unsignedlong *flags__irq_flag)
{
local_irq_save(*flags__irq_flag);
}
/* * Kfuncs for string operations. * * Since strings are not necessarily %NUL-terminated, we cannot directly call * in-kernel implementations. Instead, we open-code the implementations using * __get_kernel_nofault instead of plain dereference to make them safe.
*/
/** * bpf_strcmp - Compare two strings * @s1__ign: One string * @s2__ign: Another string * * Return: * * %0 - Strings are equal * * %-1 - @s1__ign is smaller * * %1 - @s2__ign is smaller * * %-EFAULT - Cannot read one of the strings * * %-E2BIG - One of strings is too large * * %-ERANGE - One of strings is outside of kernel address space
*/
__bpf_kfunc int bpf_strcmp(constchar *s1__ign, constchar *s2__ign)
{ char c1, c2; int i;
if (!copy_from_kernel_nofault_allowed(s1__ign, 1) ||
!copy_from_kernel_nofault_allowed(s2__ign, 1)) { return -ERANGE;
}
guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) {
__get_kernel_nofault(&c1, s1__ign, char, err_out);
__get_kernel_nofault(&c2, s2__ign, char, err_out); if (c1 != c2) return c1 < c2 ? -1 : 1; if (c1 == '\0') return 0;
s1__ign++;
s2__ign++;
} return -E2BIG;
err_out: return -EFAULT;
}
/** * bpf_strnchr - Find a character in a length limited string * @s__ign: The string to be searched * @count: The number of characters to be searched * @c: The character to search for * * Note that the %NUL-terminator is considered part of the string, and can * be searched for. * * Return: * * >=0 - Index of the first occurrence of @c within @s__ign * * %-ENOENT - @c not found in the first @count characters of @s__ign * * %-EFAULT - Cannot read @s__ign * * %-E2BIG - @s__ign is too large * * %-ERANGE - @s__ign is outside of kernel address space
*/
__bpf_kfunc int bpf_strnchr(constchar *s__ign, size_t count, char c)
{ char sc; int i;
if (!copy_from_kernel_nofault_allowed(s__ign, 1)) return -ERANGE;
guard(pagefault)(); for (i = 0; i < count && i < XATTR_SIZE_MAX; i++) {
__get_kernel_nofault(&sc, s__ign, char, err_out); if (sc == c) return i; if (sc == '\0') return -ENOENT;
s__ign++;
} return i == XATTR_SIZE_MAX ? -E2BIG : -ENOENT;
err_out: return -EFAULT;
}
/** * bpf_strchr - Find the first occurrence of a character in a string * @s__ign: The string to be searched * @c: The character to search for * * Note that the %NUL-terminator is considered part of the string, and can * be searched for. * * Return: * * >=0 - The index of the first occurrence of @c within @s__ign * * %-ENOENT - @c not found in @s__ign * * %-EFAULT - Cannot read @s__ign * * %-E2BIG - @s__ign is too large * * %-ERANGE - @s__ign is outside of kernel address space
*/
__bpf_kfunc int bpf_strchr(constchar *s__ign, char c)
{ return bpf_strnchr(s__ign, XATTR_SIZE_MAX, c);
}
/** * bpf_strchrnul - Find and return a character in a string, or end of string * @s__ign: The string to be searched * @c: The character to search for * * Return: * * >=0 - Index of the first occurrence of @c within @s__ign or index of * the null byte at the end of @s__ign when @c is not found * * %-EFAULT - Cannot read @s__ign * * %-E2BIG - @s__ign is too large * * %-ERANGE - @s__ign is outside of kernel address space
*/
__bpf_kfunc int bpf_strchrnul(constchar *s__ign, char c)
{ char sc; int i;
if (!copy_from_kernel_nofault_allowed(s__ign, 1)) return -ERANGE;
guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) {
__get_kernel_nofault(&sc, s__ign, char, err_out); if (sc == '\0' || sc == c) return i;
s__ign++;
} return -E2BIG;
err_out: return -EFAULT;
}
/** * bpf_strrchr - Find the last occurrence of a character in a string * @s__ign: The string to be searched * @c: The character to search for * * Return: * * >=0 - Index of the last occurrence of @c within @s__ign * * %-ENOENT - @c not found in @s__ign * * %-EFAULT - Cannot read @s__ign * * %-E2BIG - @s__ign is too large * * %-ERANGE - @s__ign is outside of kernel address space
*/
__bpf_kfunc int bpf_strrchr(constchar *s__ign, int c)
{ char sc; int i, last = -ENOENT;
if (!copy_from_kernel_nofault_allowed(s__ign, 1)) return -ERANGE;
guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) {
__get_kernel_nofault(&sc, s__ign, char, err_out); if (sc == c)
last = i; if (sc == '\0') return last;
s__ign++;
} return -E2BIG;
err_out: return -EFAULT;
}
/** * bpf_strnlen - Calculate the length of a length-limited string * @s__ign: The string * @count: The maximum number of characters to count * * Return: * * >=0 - The length of @s__ign * * %-EFAULT - Cannot read @s__ign * * %-E2BIG - @s__ign is too large * * %-ERANGE - @s__ign is outside of kernel address space
*/
__bpf_kfunc int bpf_strnlen(constchar *s__ign, size_t count)
{ char c; int i;
if (!copy_from_kernel_nofault_allowed(s__ign, 1)) return -ERANGE;
guard(pagefault)(); for (i = 0; i < count && i < XATTR_SIZE_MAX; i++) {
__get_kernel_nofault(&c, s__ign, char, err_out); if (c == '\0') return i;
s__ign++;
} return i == XATTR_SIZE_MAX ? -E2BIG : i;
err_out: return -EFAULT;
}
/** * bpf_strlen - Calculate the length of a string * @s__ign: The string * * Return: * * >=0 - The length of @s__ign * * %-EFAULT - Cannot read @s__ign * * %-E2BIG - @s__ign is too large * * %-ERANGE - @s__ign is outside of kernel address space
*/
__bpf_kfunc int bpf_strlen(constchar *s__ign)
{ return bpf_strnlen(s__ign, XATTR_SIZE_MAX);
}
/** * bpf_strspn - Calculate the length of the initial substring of @s__ign which * only contains letters in @accept__ign * @s__ign: The string to be searched * @accept__ign: The string to search for * * Return: * * >=0 - The length of the initial substring of @s__ign which only * contains letters from @accept__ign * * %-EFAULT - Cannot read one of the strings * * %-E2BIG - One of the strings is too large * * %-ERANGE - One of the strings is outside of kernel address space
*/
__bpf_kfunc int bpf_strspn(constchar *s__ign, constchar *accept__ign)
{ char cs, ca; int i, j;
if (!copy_from_kernel_nofault_allowed(s__ign, 1) ||
!copy_from_kernel_nofault_allowed(accept__ign, 1)) { return -ERANGE;
}
guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) {
__get_kernel_nofault(&cs, s__ign, char, err_out); if (cs == '\0') return i; for (j = 0; j < XATTR_SIZE_MAX; j++) {
__get_kernel_nofault(&ca, accept__ign + j, char, err_out); if (cs == ca || ca == '\0') break;
} if (j == XATTR_SIZE_MAX) return -E2BIG; if (ca == '\0') return i;
s__ign++;
} return -E2BIG;
err_out: return -EFAULT;
}
/** * bpf_strcspn - Calculate the length of the initial substring of @s__ign which * does not contain letters in @reject__ign * @s__ign: The string to be searched * @reject__ign: The string to search for * * Return: * * >=0 - The length of the initial substring of @s__ign which does not * contain letters from @reject__ign * * %-EFAULT - Cannot read one of the strings * * %-E2BIG - One of the strings is too large * * %-ERANGE - One of the strings is outside of kernel address space
*/
__bpf_kfunc int bpf_strcspn(constchar *s__ign, constchar *reject__ign)
{ char cs, cr; int i, j;
if (!copy_from_kernel_nofault_allowed(s__ign, 1) ||
!copy_from_kernel_nofault_allowed(reject__ign, 1)) { return -ERANGE;
}
guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) {
__get_kernel_nofault(&cs, s__ign, char, err_out); if (cs == '\0') return i; for (j = 0; j < XATTR_SIZE_MAX; j++) {
__get_kernel_nofault(&cr, reject__ign + j, char, err_out); if (cs == cr || cr == '\0') break;
} if (j == XATTR_SIZE_MAX) return -E2BIG; if (cr != '\0') return i;
s__ign++;
} return -E2BIG;
err_out: return -EFAULT;
}
/** * bpf_strnstr - Find the first substring in a length-limited string * @s1__ign: The string to be searched * @s2__ign: The string to search for * @len: the maximum number of characters to search * * Return: * * >=0 - Index of the first character of the first occurrence of @s2__ign * within the first @len characters of @s1__ign * * %-ENOENT - @s2__ign not found in the first @len characters of @s1__ign * * %-EFAULT - Cannot read one of the strings * * %-E2BIG - One of the strings is too large * * %-ERANGE - One of the strings is outside of kernel address space
*/
__bpf_kfunc int bpf_strnstr(constchar *s1__ign, constchar *s2__ign, size_t len)
{ char c1, c2; int i, j;
if (!copy_from_kernel_nofault_allowed(s1__ign, 1) ||
!copy_from_kernel_nofault_allowed(s2__ign, 1)) { return -ERANGE;
}
guard(pagefault)(); for (i = 0; i < XATTR_SIZE_MAX; i++) { for (j = 0; i + j <= len && j < XATTR_SIZE_MAX; j++) {
__get_kernel_nofault(&c2, s2__ign + j, char, err_out); if (c2 == '\0') return i; /* * We allow reading an extra byte from s2 (note the * `i + j <= len` above) to cover the case when s2 is * a suffix of the first len chars of s1.
*/ if (i + j == len) break;
__get_kernel_nofault(&c1, s1__ign + j, char, err_out); if (c1 == '\0') return -ENOENT; if (c1 != c2) break;
} if (j == XATTR_SIZE_MAX) return -E2BIG; if (i + j == len) return -ENOENT;
s1__ign++;
} return -E2BIG;
err_out: return -EFAULT;
}
/** * bpf_strstr - Find the first substring in a string * @s1__ign: The string to be searched * @s2__ign: The string to search for * * Return: * * >=0 - Index of the first character of the first occurrence of @s2__ign * within @s1__ign * * %-ENOENT - @s2__ign is not a substring of @s1__ign * * %-EFAULT - Cannot read one of the strings * * %-E2BIG - One of the strings is too large * * %-ERANGE - One of the strings is outside of kernel address space
*/
__bpf_kfunc int bpf_strstr(constchar *s1__ign, constchar *s2__ign)
{ return bpf_strnstr(s1__ign, s2__ign, XATTR_SIZE_MAX);
}
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &generic_kfunc_set);
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_CGROUP_SKB, &generic_kfunc_set);
ret = ret ?: register_btf_id_dtor_kfuncs(generic_dtors,
ARRAY_SIZE(generic_dtors),
THIS_MODULE); return ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &common_kfunc_set);
}
late_initcall(kfunc_init);
/* Get a pointer to dynptr data up to len bytes for read only access. If * the dynptr doesn't have continuous data up to len bytes, return NULL.
*/ constvoid *__bpf_dynptr_data(conststruct bpf_dynptr_kern *ptr, u32 len)
{ conststruct bpf_dynptr *p = (struct bpf_dynptr *)ptr;
return bpf_dynptr_slice(p, 0, NULL, len);
}
/* Get a pointer to dynptr data up to len bytes for read write access. If * the dynptr doesn't have continuous data up to len bytes, or the dynptr * is read only, return NULL.
*/ void *__bpf_dynptr_data_rw(conststruct bpf_dynptr_kern *ptr, u32 len)
{ if (__bpf_dynptr_is_rdonly(ptr)) return NULL; return (void *)__bpf_dynptr_data(ptr, len);
}
Messung V0.5 in Prozent
¤ Dauer der Verarbeitung: 0.76 Sekunden
(vorverarbeitet am 2026-04-27)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.