/* bpf_check() is a static code analyzer that walks eBPF program * instruction by instruction and updates register/stack state. * All paths of conditional branches are analyzed until 'bpf_exit' insn. * * The first pass is depth-first-search to check that the program is a DAG. * It rejects the following programs: * - larger than BPF_MAXINSNS insns * - if loop is present (detected via back-edge) * - unreachable insns exist (shouldn't be a forest. program = one function) * - out of bounds or malformed jumps * The second pass is all possible path descent from the 1st insn. * Since it's analyzing all paths through the program, the length of the * analysis is limited to 64k insn, which may be hit even if total number of * insn is less then 4K, but there are too many branches that change stack/regs. * Number of 'branches to be analyzed' is limited to 1k * * On entry to each instruction, each register has a type, and the instruction * changes the types of the registers depending on instruction semantics. * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is * copied to R1. * * All registers are 64-bit. * R0 - return register * R1-R5 argument passing registers * R6-R9 callee saved registers * R10 - frame pointer read-only * * At the start of BPF program the register R1 contains a pointer to bpf_context * and has type PTR_TO_CTX. * * Verifier tracks arithmetic operations on pointers in case: * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20), * 1st insn copies R10 (which has FRAME_PTR) type into R1 * and 2nd arithmetic instruction is pattern matched to recognize * that it wants to construct a pointer to some element within stack. * So after 2nd insn, the register R1 has type PTR_TO_STACK * (and -20 constant is saved for further stack bounds checking). * Meaning that this reg is a pointer to stack plus known immediate constant. * * Most of the time the registers have SCALAR_VALUE type, which * means the register has some value, but it's not a valid pointer. * (like pointer plus pointer becomes SCALAR_VALUE type) * * When verifier sees load or store instructions the type of base register * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are * four pointer types recognized by check_mem_access() function. * * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value' * and the range of [ptr, ptr + map's value_size) is accessible. * * registers used to pass values to function calls are checked against * function argument constraints. * * ARG_PTR_TO_MAP_KEY is one of such argument constraints. * It means that the register type passed to this function must be * PTR_TO_STACK and it will be used inside the function as * 'pointer to map element key' * * For example the argument constraints for bpf_map_lookup_elem(): * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, * .arg1_type = ARG_CONST_MAP_PTR, * .arg2_type = ARG_PTR_TO_MAP_KEY, * * ret_type says that this function returns 'pointer to map elem value or null' * function expects 1st argument to be a const pointer to 'struct bpf_map' and * 2nd argument should be a pointer to stack, which will be used inside * the helper function as a pointer to map element key. * * On the kernel side the helper function looks like: * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) * { * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; * void *key = (void *) (unsigned long) r2; * void *value; * * here kernel can access 'key' and 'map' pointers safely, knowing that * [key, key + map->key_size) bytes are valid and were initialized on * the stack of eBPF program. * } * * Corresponding eBPF program may look like: * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), * here verifier looks at prototype of map_lookup_elem() and sees: * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok, * Now verifier knows that this map has key of R1->map_ptr->key_size bytes * * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far, * Now verifier checks that [R2, R2 + map's key_size) are within stack limits * and were initialized prior to this call. * If it's ok, then verifier allows this BPF_CALL insn and looks at * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function * returns either pointer to map value or NULL. * * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off' * insn, the register holding that pointer in the true branch changes state to * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false * branch. See check_cond_jmp_op(). * * After the call R0 is set to return type of the function and registers R1-R5 * are set to NOT_INIT to indicate that they are no longer readable. * * The following reference types represent a potential reference to a kernel * resource which, after first being allocated, must be checked and freed by * the BPF program: * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET * * When the verifier sees a helper call return a reference type, it allocates a * pointer id for the reference and stores it in the current function state. * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type * passes through a NULL-check conditional. For the branch wherein the state is * changed to CONST_IMM, the verifier releases the reference. * * For each helper function that allocates a reference, such as * bpf_sk_lookup_tcp(), there is a corresponding release function, such as * bpf_sk_release(). When a reference type passes into the release function, * the verifier also releases the reference. If any unchecked or unreleased * reference remains at the end of the program, the verifier rejects it.
*/
/* verifier_state + insn_idx are pushed to stack when branch is encountered */ struct bpf_verifier_stack_elem { /* verifier state is 'st' * before processing instruction 'insn_idx' * and after processing instruction 'prev_insn_idx'
*/ struct bpf_verifier_state st; int insn_idx; int prev_insn_idx; struct bpf_verifier_stack_elem *next; /* length of verifier log at the time this state was pushed on stack */
u32 log_pos;
};
/* arg_{btf,btf_id,owning_ref} are used by kfunc-specific handling, * generally to pass info about user-defined local kptr types to later * verification logic * bpf_obj_drop/bpf_percpu_obj_drop * Record the local kptr type to be drop'd * bpf_refcount_acquire (via KF_ARG_PTR_TO_REFCOUNTED_KPTR arg type) * Record the local kptr type to be refcount_incr'd and use * arg_owning_ref to determine whether refcount_acquire should be * fallible
*/ struct btf *arg_btf;
u32 arg_btf_id; bool arg_owning_ref; bool arg_prog;
if (is_ptr_cast_function(func_id))
ref_obj_uses++; if (is_acquire_function(func_id, map))
ref_obj_uses++; if (is_dynptr_ref_function(func_id))
ref_obj_uses++;
staticbool is_spi_bounds_valid(struct bpf_func_state *state, int spi, int nr_slots)
{ int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
/* We need to check that slots between [spi - nr_slots + 1, spi] are * within [0, allocated_stack). * * Please note that the spi grows downwards. For example, a dynptr * takes the size of two stack slots; the first slot will be at * spi and the second slot will be at spi - 1.
*/ return spi - nr_slots + 1 >= 0 && spi < allocated_slots;
}
staticint stack_slot_obj_get_spi(struct bpf_verifier_env *env, struct bpf_reg_state *reg, constchar *obj_kind, int nr_slots)
{ int off, spi;
if (!tnum_is_const(reg->var_off)) {
verbose(env, "%s has to be at a constant offset\n", obj_kind); return -EINVAL;
}
off = reg->off + reg->var_off.value; if (off % BPF_REG_SIZE) {
verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off); return -EINVAL;
}
spi = __get_spi(off); if (spi + 1 < nr_slots) {
verbose(env, "cannot pass in %s at an offset=%d\n", obj_kind, off); return -EINVAL;
}
if (!is_spi_bounds_valid(func(env, reg), spi, nr_slots)) return -ERANGE; return spi;
}
staticint destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi);
staticint mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg, enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
{ struct bpf_func_state *state = func(env, reg); enum bpf_dynptr_type type; int spi, i, err;
spi = dynptr_get_spi(env, reg); if (spi < 0) return spi;
/* We cannot assume both spi and spi - 1 belong to the same dynptr, * hence we need to call destroy_if_dynptr_stack_slot twice for both, * to ensure that for the following example: * [d1][d1][d2][d2] * spi 3 2 1 0 * So marking spi = 2 should lead to destruction of both d1 and d2. In * case they do belong to same dynptr, second call won't see slot_type * as STACK_DYNPTR and will simply skip destruction.
*/
err = destroy_if_dynptr_stack_slot(env, state, spi); if (err) return err;
err = destroy_if_dynptr_stack_slot(env, state, spi - 1); if (err) return err;
for (i = 0; i < BPF_REG_SIZE; i++) {
state->stack[spi].slot_type[i] = STACK_DYNPTR;
state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
}
type = arg_to_dynptr_type(arg_type); if (type == BPF_DYNPTR_TYPE_INVALID) return -EINVAL;
/* Why do we need to set REG_LIVE_WRITTEN for STACK_INVALID slot? * * While we don't allow reading STACK_INVALID, it is still possible to * do <8 byte writes marking some but not all slots as STACK_MISC. Then, * helpers or insns can do partial read of that part without failing, * but check_stack_range_initialized, check_stack_read_var_off, and * check_stack_read_fixed_off will do mark_reg_read for all 8-bytes of * the slot conservatively. Hence we need to prevent those liveness * marking walks. * * This was not a problem before because STACK_INVALID is only set by * default (where the default reg state has its reg->parent as NULL), or * in clean_live_states after REG_LIVE_DONE (at which point * mark_reg_read won't walk reg->parent chain), but not randomly during * verifier state exploration (like we did above). Hence, for our case * parentage chain will still be live (i.e. reg->parent may be * non-NULL), while earlier reg->parent was NULL, so we need * REG_LIVE_WRITTEN to screen off read marker propagation when it is * done later on reads or by mark_dynptr_read as well to unnecessary * mark registers in verifier state.
*/
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
}
/* If the dynptr has a ref_obj_id, then we need to invalidate * two things: * * 1) Any dynptrs with a matching ref_obj_id (clones) * 2) Any slices derived from this dynptr.
*/
/* Invalidate any slices associated with this dynptr */
WARN_ON_ONCE(release_reference(env, ref_obj_id));
/* Invalidate any dynptr clones */ for (i = 1; i < state->allocated_stack / BPF_REG_SIZE; i++) { if (state->stack[i].spilled_ptr.ref_obj_id != ref_obj_id) continue;
/* it should always be the case that if the ref obj id * matches then the stack slot also belongs to a * dynptr
*/ if (state->stack[i].slot_type[0] != STACK_DYNPTR) {
verifier_bug(env, "misconfigured ref_obj_id"); return -EFAULT;
} if (state->stack[i].spilled_ptr.dynptr.first_slot)
invalidate_dynptr(env, state, i);
}
staticint destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env, struct bpf_func_state *state, int spi)
{ struct bpf_func_state *fstate; struct bpf_reg_state *dreg; int i, dynptr_id;
/* We always ensure that STACK_DYNPTR is never set partially, * hence just checking for slot_type[0] is enough. This is * different for STACK_SPILL, where it may be only set for * 1 byte, so code has to use is_spilled_reg.
*/ if (state->stack[spi].slot_type[0] != STACK_DYNPTR) return 0;
/* Reposition spi to first slot */ if (!state->stack[spi].spilled_ptr.dynptr.first_slot)
spi = spi + 1;
/* Writing partially to one dynptr stack slot destroys both. */ for (i = 0; i < BPF_REG_SIZE; i++) {
state->stack[spi].slot_type[i] = STACK_INVALID;
state->stack[spi - 1].slot_type[i] = STACK_INVALID;
}
dynptr_id = state->stack[spi].spilled_ptr.id; /* Invalidate any slices associated with this dynptr */
bpf_for_each_reg_in_vstate(env->cur_state, fstate, dreg, ({ /* Dynptr slices are only PTR_TO_MEM_OR_NULL and PTR_TO_MEM */ if (dreg->type != (PTR_TO_MEM | PTR_MAYBE_NULL) && dreg->type != PTR_TO_MEM) continue; if (dreg->dynptr_id == dynptr_id)
mark_reg_invalid(env, dreg);
}));
/* Do not release reference state, we are destroying dynptr on stack, * not using some helper to release it. Just reset register.
*/
__mark_reg_not_init(env, &state->stack[spi].spilled_ptr);
__mark_reg_not_init(env, &state->stack[spi - 1].spilled_ptr);
/* Same reason as unmark_stack_slots_dynptr above */
state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
state->stack[spi - 1].spilled_ptr.live |= REG_LIVE_WRITTEN;
return 0;
}
staticbool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{ int spi;
if (reg->type == CONST_PTR_TO_DYNPTR) returnfalse;
spi = dynptr_get_spi(env, reg);
/* -ERANGE (i.e. spi not falling into allocated stack slots) isn't an * error because this just means the stack state hasn't been updated yet. * We will do check_mem_access to check and update stack bounds later.
*/ if (spi < 0 && spi != -ERANGE) returnfalse;
/* We don't need to check if the stack slots are marked by previous * dynptr initializations because we allow overwriting existing unreferenced * STACK_DYNPTR slots, see mark_stack_slots_dynptr which calls * destroy_if_dynptr_stack_slot to ensure dynptr objects at the slots we are * touching are completely destructed before we reinitialize them for a new * one. For referenced ones, destroy_if_dynptr_stack_slot returns an error early * instead of delaying it until the end where the user will get "Unreleased * reference" error.
*/ returntrue;
}
staticbool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
{ struct bpf_func_state *state = func(env, reg); int i, spi;
/* This already represents first slot of initialized bpf_dynptr. * * CONST_PTR_TO_DYNPTR already has fixed and var_off as 0 due to * check_func_arg_reg_off's logic, so we don't need to check its * offset and alignment.
*/ if (reg->type == CONST_PTR_TO_DYNPTR) returntrue;
spi = dynptr_get_spi(env, reg); if (spi < 0) returnfalse; if (!state->stack[spi].spilled_ptr.dynptr.first_slot) returnfalse;
for (i = 0; i < BPF_REG_SIZE; i++) { if (state->stack[spi].slot_type[i] != STACK_DYNPTR ||
state->stack[spi - 1].slot_type[i] != STACK_DYNPTR) returnfalse;
}
staticbool is_iter_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int nr_slots)
{ struct bpf_func_state *state = func(env, reg); int spi, i, j;
/* For -ERANGE (i.e. spi not falling into allocated stack slots), we * will do check_mem_access to check and update stack bounds later, so * return true for that case.
*/
spi = iter_get_spi(env, reg, nr_slots); if (spi == -ERANGE) returntrue; if (spi < 0) returnfalse;
for (i = 0; i < nr_slots; i++) { struct bpf_stack_state *slot = &state->stack[spi - i];
for (j = 0; j < BPF_REG_SIZE; j++) if (slot->slot_type[j] == STACK_ITER) returnfalse;
}
returntrue;
}
staticint is_iter_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg, struct btf *btf, u32 btf_id, int nr_slots)
{ struct bpf_func_state *state = func(env, reg); int spi, i, j;
for (i = 0; i < nr_slots; i++) { struct bpf_stack_state *slot = &state->stack[spi - i]; struct bpf_reg_state *st = &slot->spilled_ptr;
if (st->type & PTR_UNTRUSTED) return -EPROTO; /* only main (first) slot has ref_obj_id set */ if (i == 0 && !st->ref_obj_id) return -EINVAL; if (i != 0 && st->ref_obj_id) return -EINVAL; if (st->iter.btf != btf || st->iter.btf_id != btf_id) return -EINVAL;
for (j = 0; j < BPF_REG_SIZE; j++) if (slot->slot_type[j] != STACK_ITER) return -EINVAL;
}
return 0;
}
staticint acquire_irq_state(struct bpf_verifier_env *env, int insn_idx); staticint release_irq_state(struct bpf_verifier_state *state, int id);
staticint mark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_kfunc_call_arg_meta *meta, struct bpf_reg_state *reg, int insn_idx, int kfunc_class)
{ struct bpf_func_state *state = func(env, reg); struct bpf_stack_state *slot; struct bpf_reg_state *st; int spi, i, id;
spi = irq_flag_get_spi(env, reg); if (spi < 0) return spi;
id = acquire_irq_state(env, insn_idx); if (id < 0) return id;
slot = &state->stack[spi];
st = &slot->spilled_ptr;
__mark_reg_known_zero(st);
st->type = PTR_TO_STACK; /* we don't have dedicated reg type */
st->live |= REG_LIVE_WRITTEN;
st->ref_obj_id = id;
st->irq.kfunc_class = kfunc_class;
for (i = 0; i < BPF_REG_SIZE; i++)
slot->slot_type[i] = STACK_IRQ_FLAG;
mark_stack_slot_scratched(env, spi); return 0;
}
staticint unmark_stack_slot_irq_flag(struct bpf_verifier_env *env, struct bpf_reg_state *reg, int kfunc_class)
{ struct bpf_func_state *state = func(env, reg); struct bpf_stack_state *slot; struct bpf_reg_state *st; int spi, i, err;
spi = irq_flag_get_spi(env, reg); if (spi < 0) return spi;
slot = &state->stack[spi];
st = &slot->spilled_ptr;
verbose(env, "irq flag acquired by %s kfuncs cannot be restored with %s kfuncs\n",
flag_kfunc, used_kfunc); return -EINVAL;
}
err = release_irq_state(env->cur_state, st->ref_obj_id);
WARN_ON_ONCE(err && err != -EACCES); if (err) { int insn_idx = 0;
for (int i = 0; i < env->cur_state->acquired_refs; i++) { if (env->cur_state->refs[i].id == env->cur_state->active_irq_id) {
insn_idx = env->cur_state->refs[i].insn_idx; break;
}
}
verbose(env, "cannot restore irq state out of order, expected id=%d acquired at insn_idx=%d\n",
env->cur_state->active_irq_id, insn_idx); return err;
}
__mark_reg_not_init(env, st);
/* see unmark_stack_slots_dynptr() for why we need to set REG_LIVE_WRITTEN */
st->live |= REG_LIVE_WRITTEN;
for (i = 0; i < BPF_REG_SIZE; i++)
slot->slot_type[i] = STACK_INVALID;
/* For -ERANGE (i.e. spi not falling into allocated stack slots), we * will do check_mem_access to check and update stack bounds later, so * return true for that case.
*/
spi = irq_flag_get_spi(env, reg); if (spi == -ERANGE) returntrue; if (spi < 0) returnfalse;
slot = &state->stack[spi];
for (i = 0; i < BPF_REG_SIZE; i++) if (slot->slot_type[i] == STACK_IRQ_FLAG) returnfalse; returntrue;
}
spi = irq_flag_get_spi(env, reg); if (spi < 0) return -EINVAL;
slot = &state->stack[spi];
st = &slot->spilled_ptr;
if (!st->ref_obj_id) return -EINVAL;
for (i = 0; i < BPF_REG_SIZE; i++) if (slot->slot_type[i] != STACK_IRQ_FLAG) return -EINVAL; return 0;
}
/* Check if given stack slot is "special": * - spilled register state (STACK_SPILL); * - dynptr state (STACK_DYNPTR); * - iter state (STACK_ITER). * - irq flag state (STACK_IRQ_FLAG)
*/ staticbool is_stack_slot_special(conststruct bpf_stack_state *stack)
{ enum bpf_stack_slot_type type = stack->slot_type[BPF_REG_SIZE - 1];
switch (type) { case STACK_SPILL: case STACK_DYNPTR: case STACK_ITER: case STACK_IRQ_FLAG: returntrue; case STACK_INVALID: case STACK_MISC: case STACK_ZERO: returnfalse; default:
WARN_ONCE(1, "unknown stack slot type %d\n", type); returntrue;
}
}
/* The reg state of a pointer or a bounded scalar was saved when * it was spilled to the stack.
*/ staticbool is_spilled_reg(conststruct bpf_stack_state *stack)
{ return stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL;
}
/* Mark stack slot as STACK_MISC, unless it is already STACK_INVALID, in which * case they are equivalent, or it's STACK_ZERO, in which case we preserve * more precise STACK_ZERO. * Regardless of allow_ptr_leaks setting (i.e., privileged or unprivileged * mode), we won't promote STACK_INVALID to STACK_MISC. In privileged case it is * unnecessary as both are considered equivalent when loading data and pruning, * in case of unprivileged mode it will be incorrect to allow reads of invalid * slots.
*/ staticvoid mark_stack_slot_misc(struct bpf_verifier_env *env, u8 *stype)
{ if (*stype == STACK_ZERO) return; if (*stype == STACK_INVALID) return;
*stype = STACK_MISC;
}
/* copy array src of length n * size bytes to dst. dst is reallocated if it's too * small to hold src. This is different from krealloc since we don't want to preserve * the contents of dst. * * Leaves dst untouched if src is NULL or length is zero. Returns NULL if memory could * not be allocated.
*/ staticvoid *copy_array(void *dst, constvoid *src, size_t n, size_t size, gfp_t flags)
{
size_t alloc_bytes; void *orig = dst;
size_t bytes;
if (ZERO_OR_NULL_PTR(src)) goto out;
if (unlikely(check_mul_overflow(n, size, &bytes))) return NULL;
/* resize an array from old_n items to new_n items. the array is reallocated if it's too * small to hold new_n items. new items are zeroed out if the array grows. * * Contrary to krealloc_array, does not free arr if new_n is zero.
*/ staticvoid *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size)
{
size_t alloc_size; void *new_arr;
staticint resize_reference_state(struct bpf_verifier_state *state, size_t n)
{
state->refs = realloc_array(state->refs, state->acquired_refs, n, sizeof(struct bpf_reference_state)); if (!state->refs) return -ENOMEM;
state->acquired_refs = n; return 0;
}
/* Possibly update state->allocated_stack to be at least size bytes. Also * possibly update the function's high-water mark in its bpf_subprog_info.
*/ staticint grow_stack_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int size)
{
size_t old_n = state->allocated_stack / BPF_REG_SIZE, n;
/* The stack size is always a multiple of BPF_REG_SIZE. */
size = round_up(size, BPF_REG_SIZE);
n = size / BPF_REG_SIZE;
if (old_n >= n) return 0;
state->stack = realloc_array(state->stack, old_n, n, sizeof(struct bpf_stack_state)); if (!state->stack) return -ENOMEM;
state->allocated_stack = size;
/* update known max for given subprogram */ if (env->subprog_info[state->subprogno].stack_depth < size)
env->subprog_info[state->subprogno].stack_depth = size;
return 0;
}
/* Acquire a pointer id from the env and update the state->refs to include * this new pointer reference. * On success, returns a valid pointer id to associate with the register * On failure, returns a negative errno.
*/ staticstruct bpf_reference_state *acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
{ struct bpf_verifier_state *state = env->cur_state; int new_ofs = state->acquired_refs; int err;
s = acquire_reference_state(env, insn_idx); if (!s) return -ENOMEM;
s->type = REF_TYPE_IRQ;
s->id = ++env->id_gen;
state->active_irq_id = s->id; return s->id;
}
staticvoid release_reference_state(struct bpf_verifier_state *state, int idx)
{ int last_idx;
size_t rem;
/* IRQ state requires the relative ordering of elements remaining the * same, since it relies on the refs array to behave as a stack, so that * it can detect out-of-order IRQ restore. Hence use memmove to shift * the array instead of swapping the final element into the deleted idx.
*/
last_idx = state->acquired_refs - 1;
rem = state->acquired_refs - idx - 1; if (last_idx && idx != last_idx)
memmove(&state->refs[idx], &state->refs[idx + 1], sizeof(*state->refs) * rem);
memset(&state->refs[last_idx], 0, sizeof(*state->refs));
state->acquired_refs--; return;
}
staticbool find_reference_state(struct bpf_verifier_state *state, int ptr_id)
{ int i;
for (i = 0; i < state->acquired_refs; i++) if (state->refs[i].id == ptr_id) returntrue;
returnfalse;
}
staticint release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
{ void *prev_ptr = NULL;
u32 prev_id = 0; int i;
for (i = 0; i < state->acquired_refs; i++) { if (state->refs[i].type == type && state->refs[i].id == id &&
state->refs[i].ptr == ptr) {
release_reference_state(state, i);
state->active_locks--; /* Reassign active lock (id, ptr). */
state->active_lock_id = prev_id;
state->active_lock_ptr = prev_ptr; return 0;
} if (state->refs[i].type & REF_TYPE_LOCK_MASK) {
prev_id = state->refs[i].id;
prev_ptr = state->refs[i].ptr;
}
} return -EINVAL;
}
staticint release_irq_state(struct bpf_verifier_state *state, int id)
{
u32 prev_id = 0; int i;
if (id != state->active_irq_id) return -EACCES;
for (i = 0; i < state->acquired_refs; i++) { if (state->refs[i].type != REF_TYPE_IRQ) continue; if (state->refs[i].id == id) {
release_reference_state(state, i);
state->active_irq_id = prev_id; return 0;
} else {
prev_id = state->refs[i].id;
}
} return -EINVAL;
}
staticstruct bpf_reference_state *find_lock_state(struct bpf_verifier_state *state, enum ref_state_type type, int id, void *ptr)
{ int i;
for (i = 0; i < state->acquired_refs; i++) { struct bpf_reference_state *s = &state->refs[i];
if (!(s->type & type)) continue;
if (s->id == id && s->ptr == ptr) return s;
} return NULL;
}
staticvoid free_verifier_state(struct bpf_verifier_state *state, bool free_self)
{ int i;
for (i = 0; i <= state->curframe; i++) {
free_func_state(state->frame[i]);
state->frame[i] = NULL;
}
kfree(state->refs);
clear_jmp_history(state); if (free_self)
kfree(state);
}
/* struct bpf_verifier_state->parent refers to states * that are in either of env->{expored_states,free_list}. * In both cases the state is contained in struct bpf_verifier_state_list.
*/ staticstruct bpf_verifier_state_list *state_parent_as_list(struct bpf_verifier_state *st)
{ if (st->parent) return container_of(st->parent, struct bpf_verifier_state_list, state); return NULL;
}
/* A state can be freed if it is no longer referenced: * - is in the env->free_list; * - has no children states;
*/ staticvoid maybe_free_verifier_state(struct bpf_verifier_env *env, struct bpf_verifier_state_list *sl)
{ if (!sl->in_free_list
|| sl->state.branches != 0
|| incomplete_read_marks(env, &sl->state)) return;
list_del(&sl->node);
free_verifier_state(&sl->state, false);
kfree(sl);
env->free_list_size--;
}
/* copy verifier state from src to dst growing dst stack space * when necessary to accommodate larger src stack
*/ staticint copy_func_state(struct bpf_func_state *dst, conststruct bpf_func_state *src)
{
memcpy(dst, src, offsetof(struct bpf_func_state, stack)); return copy_stack_state(dst, src);
}
staticint copy_verifier_state(struct bpf_verifier_state *dst_state, conststruct bpf_verifier_state *src)
{ struct bpf_func_state *dst; int i, err;
staticbool same_callsites(struct bpf_verifier_state *a, struct bpf_verifier_state *b)
{ int fr;
if (a->curframe != b->curframe) returnfalse;
for (fr = a->curframe; fr >= 0; fr--) if (a->frame[fr]->callsite != b->frame[fr]->callsite) returnfalse;
returntrue;
}
/* Return IP for a given frame in a call stack */ static u32 frame_insn_idx(struct bpf_verifier_state *st, u32 frame)
{ return frame == st->curframe
? st->insn_idx
: st->frame[frame + 1]->callsite;
}
/* For state @st look for a topmost frame with frame_insn_idx() in some SCC, * if such frame exists form a corresponding @callchain as an array of * call sites leading to this frame and SCC id. * E.g.: * * void foo() { A: loop {... SCC#1 ...}; } * void bar() { B: loop { C: foo(); ... SCC#2 ... } * D: loop { E: foo(); ... SCC#3 ... } } * void main() { F: bar(); } * * @callchain at (A) would be either (F,SCC#2) or (F,SCC#3) depending * on @st frame call sites being (F,C,A) or (F,E,A).
*/ staticbool compute_scc_callchain(struct bpf_verifier_env *env, struct bpf_verifier_state *st, struct bpf_scc_callchain *callchain)
{
u32 i, scc, insn_idx;
memset(callchain, 0, sizeof(*callchain)); for (i = 0; i <= st->curframe; i++) {
insn_idx = frame_insn_idx(st, i);
scc = env->insn_aux_data[insn_idx].scc; if (scc) {
callchain->scc = scc; break;
} elseif (i < st->curframe) {
callchain->callsites[i] = insn_idx;
} else { returnfalse;
}
} returntrue;
}
if (!info) return NULL; for (i = 0; i < info->num_visits; i++) if (memcmp(callchain, &visits[i].callchain, sizeof(*callchain)) == 0) return &visits[i]; return NULL;
}
/* Allocate a new bpf_scc_visit instance corresponding to @callchain. * Allocated instances are alive for a duration of the do_check_common() * call and are freed by free_states().
*/ staticstruct bpf_scc_visit *scc_visit_alloc(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain)
{ struct bpf_scc_visit *visit; struct bpf_scc_info *info;
u32 scc, num_visits;
u64 new_sz;
/* Form a string '(callsite#1,callsite#2,...,scc)' in env->tmp_str_buf */ staticchar *format_callchain(struct bpf_verifier_env *env, struct bpf_scc_callchain *callchain)
{ char *buf = env->tmp_str_buf; int i, delta = 0;
/* If callchain for @st exists (@st is in some SCC), ensure that * bpf_scc_visit instance for this callchain exists. * If instance does not exist or is empty, assign visit->entry_state to @st.
*/ staticint maybe_enter_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
{ struct bpf_scc_callchain *callchain = &env->callchain_buf; struct bpf_scc_visit *visit;
if (!compute_scc_callchain(env, st, callchain)) return 0;
visit = scc_visit_lookup(env, callchain);
visit = visit ?: scc_visit_alloc(env, callchain); if (!visit) return -ENOMEM; if (!visit->entry_state) {
visit->entry_state = st; if (env->log.level & BPF_LOG_LEVEL2)
verbose(env, "SCC enter %s\n", format_callchain(env, callchain));
} return 0;
}
/* If callchain for @st exists (@st is in some SCC), make it empty: * - set visit->entry_state to NULL; * - flush accumulated backedges.
*/ staticint maybe_exit_scc(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
{ struct bpf_scc_callchain *callchain = &env->callchain_buf; struct bpf_scc_visit *visit;
if (!compute_scc_callchain(env, st, callchain)) return 0;
visit = scc_visit_lookup(env, callchain); if (!visit) { /* * If path traversal stops inside an SCC, corresponding bpf_scc_visit * must exist for non-speculative paths. For non-speculative paths * traversal stops when: * a. Verification error is found, maybe_exit_scc() is not called. * b. Top level BPF_EXIT is reached. Top level BPF_EXIT is not a member * of any SCC. * c. A checkpoint is reached and matched. Checkpoints are created by * is_state_visited(), which calls maybe_enter_scc(), which allocates * bpf_scc_visit instances for checkpoints within SCCs. * (c) is the only case that can reach this point.
*/ if (!st->speculative) {
verifier_bug(env, "scc exit: no visit info for call chain %s",
format_callchain(env, callchain)); return -EFAULT;
} return 0;
} if (visit->entry_state != st) return 0; if (env->log.level & BPF_LOG_LEVEL2)
verbose(env, "SCC exit %s\n", format_callchain(env, callchain));
visit->entry_state = NULL;
env->num_backedges -= visit->num_backedges;
visit->num_backedges = 0;
update_peak_states(env); return propagate_backedges(env, visit);
}
/* Lookup an bpf_scc_visit instance corresponding to @st callchain * and add @backedge to visit->backedges. @st callchain must exist.
*/ staticint add_scc_backedge(struct bpf_verifier_env *env, struct bpf_verifier_state *st, struct bpf_scc_backedge *backedge)
{ struct bpf_scc_callchain *callchain = &env->callchain_buf; struct bpf_scc_visit *visit;
if (!compute_scc_callchain(env, st, callchain)) {
verifier_bug(env, "add backedge: no SCC in verification path, insn_idx %d",
st->insn_idx); return -EFAULT;
}
visit = scc_visit_lookup(env, callchain); if (!visit) {
verifier_bug(env, "add backedge: no visit info for call chain %s",
format_callchain(env, callchain)); return -EFAULT;
} if (env->log.level & BPF_LOG_LEVEL2)
verbose(env, "SCC backedge %s\n", format_callchain(env, callchain));
backedge->next = visit->backedges;
visit->backedges = backedge;
visit->num_backedges++;
env->num_backedges++;
update_peak_states(env); return 0;
}
/* bpf_reg_state->live marks for registers in a state @st are incomplete, * if state @st is in some SCC and not all execution paths starting at this * SCC are fully explored.
*/ staticbool incomplete_read_marks(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
{ struct bpf_scc_callchain *callchain = &env->callchain_buf; struct bpf_scc_visit *visit;
if (!compute_scc_callchain(env, st, callchain)) returnfalse;
visit = scc_visit_lookup(env, callchain); if (!visit) returnfalse; return !!visit->backedges;
}
/* verifier_bug_if(br > 1, ...) technically makes sense here, * but see comment in push_stack(), hence:
*/
verifier_bug_if((int)br < 0, env, "%s:branches_to_explore=%d", __func__, br); if (br) break;
err = maybe_exit_scc(env, st); if (err) return err;
parent = st->parent;
parent_sl = state_parent_as_list(st); if (sl)
maybe_free_verifier_state(env, sl);
st = parent;
sl = parent_sl;
} return 0;
}
staticint pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, int *insn_idx, bool pop_log)
{ struct bpf_verifier_state *cur = env->cur_state; struct bpf_verifier_stack_elem *elem, *head = env->head; int err;
if (env->head == NULL) return -ENOENT;
if (cur) {
err = copy_verifier_state(cur, &head->st); if (err) return err;
} if (pop_log)
bpf_vlog_reset(&env->log, head->log_pos); if (insn_idx)
*insn_idx = head->insn_idx; if (prev_insn_idx)
*prev_insn_idx = head->prev_insn_idx;
elem = head->next;
free_verifier_state(&head->st, false);
kfree(head);
env->head = elem;
env->stack_size--; return 0;
}
staticbool error_recoverable_with_nospec(int err)
{ /* Should only return true for non-fatal errors that are allowed to * occur during speculative verification. For these we can insert a * nospec and the program might still be accepted. Do not include * something like ENOMEM because it is likely to re-occur for the next * architectural path once it has been recovered-from in all speculative * paths.
*/ return err == -EPERM || err == -EACCES || err == -EINVAL;
}
staticstruct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx, bool speculative)
{ struct bpf_verifier_state *cur = env->cur_state; struct bpf_verifier_stack_elem *elem; int err;
elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL_ACCOUNT); if (!elem) return NULL;
elem->insn_idx = insn_idx;
elem->prev_insn_idx = prev_insn_idx;
elem->next = env->head;
elem->log_pos = env->log.end_pos;
env->head = elem;
env->stack_size++;
err = copy_verifier_state(&elem->st, cur); if (err) return NULL;
elem->st.speculative |= speculative; if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
verbose(env, "The sequence of %d jumps is too complex.\n",
env->stack_size); return NULL;
} if (elem->st.parent) {
++elem->st.parent->branches; /* WARN_ON(branches > 2) technically makes sense here, * but * 1. speculative states will bump 'branches' for non-branch * instructions * 2. is_state_visited() heuristics may decide not to create * a new state for a sequence of branches and all such current * and cloned states will be pointing to a single parent state * which might have large 'branches' count.
*/
} return &elem->st;
}
/* Mark the unknown part of a register (variable offset or scalar value) as * known to have the value @imm.
*/ staticvoid __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
{ /* Clear off and union(map_ptr, range) */
memset(((u8 *)reg) + sizeof(reg->type), 0,
offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
reg->id = 0;
reg->ref_obj_id = 0;
___mark_reg_known(reg, imm);
}
/* Mark the 'variable offset' part of a register as zero. This should be * used only on registers holding a pointer type.
*/ staticvoid __mark_reg_known_zero(struct bpf_reg_state *reg)
{
__mark_reg_known(reg, 0);
}
staticvoid __mark_reg_const_zero(conststruct bpf_verifier_env *env, struct bpf_reg_state *reg)
{
__mark_reg_known(reg, 0);
reg->type = SCALAR_VALUE; /* all scalars are assumed imprecise initially (unless unprivileged, * in which case everything is forced to be precise)
*/
reg->precise = !env->bpf_capable;
}
staticvoid mark_reg_known_zero(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)
{ if (WARN_ON(regno >= MAX_BPF_REG)) {
verbose(env, "mark_reg_known_zero(regs, %u)\n", regno); /* Something bad happened, let's kill all regs */ for (regno = 0; regno < MAX_BPF_REG; regno++)
__mark_reg_not_init(env, regs + regno); return;
}
__mark_reg_known_zero(regs + regno);
}
staticvoid __mark_dynptr_reg(struct bpf_reg_state *reg, enum bpf_dynptr_type type, bool first_slot, int dynptr_id)
{ /* reg->type has no meaning for STACK_DYNPTR, but when we set reg for * callback arguments, it does need to be CONST_PTR_TO_DYNPTR, so simply * set it unconditionally as it is ignored for STACK_DYNPTR anyway.
*/
__mark_reg_known_zero(reg);
reg->type = CONST_PTR_TO_DYNPTR;
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.11 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.