/* BTF (BPF Type Format) is the meta data format which describes * the data types of BPF program/map. Hence, it basically focus * on the C programming language which the modern BPF is primary * using. * * ELF Section: * ~~~~~~~~~~~ * The BTF data is stored under the ".BTF" ELF section * * struct btf_type: * ~~~~~~~~~~~~~~~ * Each 'struct btf_type' object describes a C data type. * Depending on the type it is describing, a 'struct btf_type' * object may be followed by more data. F.e. * To describe an array, 'struct btf_type' is followed by * 'struct btf_array'. * * 'struct btf_type' and any extra data following it are * 4 bytes aligned. * * Type section: * ~~~~~~~~~~~~~ * The BTF type section contains a list of 'struct btf_type' objects. * Each one describes a C type. Recall from the above section * that a 'struct btf_type' object could be immediately followed by extra * data in order to describe some particular C types. * * type_id: * ~~~~~~~ * Each btf_type object is identified by a type_id. The type_id * is implicitly implied by the location of the btf_type object in * the BTF type section. The first one has type_id 1. The second * one has type_id 2...etc. Hence, an earlier btf_type has * a smaller type_id. * * A btf_type object may refer to another btf_type object by using * type_id (i.e. the "type" in the "struct btf_type"). * * NOTE that we cannot assume any reference-order. * A btf_type object can refer to an earlier btf_type object * but it can also refer to a later btf_type object. * * For example, to describe "const void *". A btf_type * object describing "const" may refer to another btf_type * object describing "void *". This type-reference is done * by specifying type_id: * * [1] CONST (anon) type_id=2 * [2] PTR (anon) type_id=0 * * The above is the btf_verifier debug log: * - Each line started with "[?]" is a btf_type object * - [?] is the type_id of the btf_type object. * - CONST/PTR is the BTF_KIND_XXX * - "(anon)" is the name of the type. It just * happens that CONST and PTR has no name. * - type_id=XXX is the 'u32 type' in btf_type * * NOTE: "void" has type_id 0 * * String section: * ~~~~~~~~~~~~~~ * The BTF string section contains the names used by the type section. * Each string is referred by an "offset" from the beginning of the * string section. * * Each string is '\0' terminated. * * The first character in the string section must be '\0' * which is used to mean 'anonymous'. Some btf_type may not * have a name.
*/
/* BTF verification: * * To verify BTF data, two passes are needed. * * Pass #1 * ~~~~~~~ * The first pass is to collect all btf_type objects to * an array: "btf->types". * * Depending on the C type that a btf_type is describing, * a btf_type may be followed by extra data. We don't know * how many btf_type is there, and more importantly we don't * know where each btf_type is located in the type section. * * Without knowing the location of each type_id, most verifications * cannot be done. e.g. an earlier btf_type may refer to a later * btf_type (recall the "const void *" above), so we cannot * check this type-reference in the first pass. * * In the first pass, it still does some verifications (e.g. * checking the name is a valid offset to the string section). * * Pass #2 * ~~~~~~~ * The main focus is to resolve a btf_type that is referring * to another type. * * We have to ensure the referring type: * 1) does exist in the BTF (i.e. in btf->types[]) * 2) does not cause a loop: * struct A { * struct B b; * }; * * struct B { * struct A a; * }; * * btf_type_needs_resolve() decides if a btf_type needs * to be resolved. * * The needs_resolve type implements the "resolve()" ops which * essentially does a DFS and detects backedge. * * During resolve (or DFS), different C types have different * "RESOLVED" conditions. * * When resolving a BTF_KIND_STRUCT, we need to resolve all its * members because a member is always referring to another * type. A struct's member can be treated as "RESOLVED" if * it is referring to a BTF_KIND_PTR. Otherwise, the * following valid C struct would be rejected: * * struct A { * int m; * struct A *a; * }; * * When resolving a BTF_KIND_PTR, it needs to keep resolving if * it is referring to another BTF_KIND_PTR. Otherwise, we cannot * detect a pointer loop, e.g.: * BTF_KIND_CONST -> BTF_KIND_PTR -> BTF_KIND_CONST -> BTF_KIND_PTR + * ^ | * +-----------------------------------------+ *
*/
/* 16MB for 64k structs and each has 16 members and * a few MB spaces for the string section. * The hard limit is S32_MAX.
*/ #define BTF_MAX_SIZE (16 * 1024 * 1024)
#define for_each_member_from(i, from, struct_type, member) \ for (i = from, member = btf_type_member(struct_type) + from; \
i < btf_type_vlen(struct_type); \
i++, member++)
#define for_each_vsi_from(i, from, struct_type, member) \ for (i = from, member = btf_type_var_secinfo(struct_type) + from; \
i < btf_type_vlen(struct_type); \
i++, member++)
/* split BTF support */ struct btf *base_btf;
u32 start_id; /* first type ID in this BTF (0 for base BTF) */
u32 start_str_off; /* first string offset (0 for base BTF) */ char name[MODULE_NAME_LEN]; bool kernel_btf;
__u32 *base_id_map; /* map from distilled base BTF -> vmlinux BTF ids */
};
enum resolve_mode {
RESOLVE_TBD, /* To Be Determined */
RESOLVE_PTR, /* Resolving for Pointer */
RESOLVE_STRUCT_OR_ARRAY, /* Resolving for struct/union * or array
*/
};
/* Chunk size we use in safe copy of data to be shown. */ #define BTF_SHOW_OBJ_SAFE_SIZE 32
/* * This is the maximum size of a base type value (equivalent to a * 128-bit int); if we are at the end of our safe buffer and have * less than 16 bytes space we can't be assured of being able * to copy the next type safely, so in such cases we will initiate * a new copy.
*/ #define BTF_SHOW_OBJ_BASE_TYPE_SIZE 16
/* Type name size */ #define BTF_SHOW_NAME_SIZE 80
/* * The suffix of a type that indicates it cannot alias another type when * comparing BTF IDs for kfunc invocations.
*/ #define NOCAST_ALIAS_SUFFIX "___init"
/* * Common data to all BTF show operations. Private show functions can add * their own data to a structure containing a struct btf_show and consult it * in the show callback. See btf_type_show() below. * * One challenge with showing nested data is we want to skip 0-valued * data, but in order to figure out whether a nested object is all zeros * we need to walk through it. As a result, we need to make two passes * when handling structs, unions and arrays; the first path simply looks * for nonzero data, while the second actually does the display. The first * pass is signalled by show->state.depth_check being set, and if we * encounter a non-zero value we set show->state.depth_to_show to * the depth at which we encountered it. When we have completed the * first pass, we will know if anything needs to be displayed if * depth_to_show > depth. See btf_[struct,array]_show() for the * implementation of this. * * Another problem is we want to ensure the data for display is safe to * access. To support this, the anonymous "struct {} obj" tracks the data * object and our safe copy of it. We copy portions of the data needed * to the object "copy" buffer, but because its size is limited to * BTF_SHOW_OBJ_COPY_LEN bytes, multiple copies may be required as we * traverse larger objects for display. * * The various data type show functions all start with a call to * btf_show_start_type() which returns a pointer to the safe copy * of the data needed (or if BTF_SHOW_UNSAFE is specified, to the * raw data itself). btf_show_obj_safe() is responsible for * using copy_from_kernel_nofault() to update the safe data if necessary * as we traverse the object's data. skbuff-like semantics are * used: * * - obj.head points to the start of the toplevel object for display * - obj.size is the size of the toplevel object * - obj.data points to the current point in the original data at * which our safe data starts. obj.data will advance as we copy * portions of the data. * * In most cases a single copy will suffice, but larger data structures * such as "struct task_struct" will require many copies. The logic in * btf_show_obj_safe() handles the logic that determines if a new * copy_from_kernel_nofault() is needed.
*/ struct btf_show {
u64 flags; void *target; /* target of show operation (seq file, buffer) */
__printf(2, 0) void (*showfn)(struct btf_show *show, constchar *fmt, va_list args); conststruct btf *btf; /* below are used during iteration */ struct {
u8 depth;
u8 depth_to_show;
u8 depth_check;
u8 array_member:1,
array_terminated:1;
u16 array_encoding;
u32 type_id; int status; /* non-zero for error */ conststruct btf_type *type; conststruct btf_member *member; char name[BTF_SHOW_NAME_SIZE]; /* space for member name/type */
} state; struct {
u32 size; void *head; void *data;
u8 safe[BTF_SHOW_OBJ_SAFE_SIZE];
} obj;
};
staticbool btf_type_is_modifier(conststruct btf_type *t)
{ /* Some of them is not strictly a C modifier * but they are grouped into the same bucket * for BTF concern: * A type (t) that refers to another * type through t->type AND its size cannot * be determined without following the t->type. * * ptr does not fall into this bucket * because its size is always sizeof(void *).
*/ switch (BTF_INFO_KIND(t->info)) { case BTF_KIND_TYPEDEF: case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_TYPE_TAG: returntrue;
}
returnfalse;
}
bool btf_type_is_void(conststruct btf_type *t)
{ return t == &btf_void;
}
btf = bpf_get_btf_vmlinux(); if (IS_ERR(btf)) return PTR_ERR(btf); if (!btf) return -EINVAL;
ret = btf_find_by_name_kind(btf, name, kind); /* ret is never zero, since btf_find_by_name_kind returns * positive btf_id or negative error.
*/ if (ret > 0) {
btf_get(btf);
*btf_p = btf; return ret;
}
/* If name is not found in vmlinux's BTF then search in module's BTFs */
spin_lock_bh(&btf_idr_lock);
idr_for_each_entry(&btf_idr, btf, id) { if (!btf_is_module(btf)) continue; /* linear search could be slow hence unlock/lock * the IDR to avoiding holding it for too long
*/
btf_get(btf);
spin_unlock_bh(&btf_idr_lock);
ret = btf_find_by_name_kind(btf, name, kind); if (ret > 0) {
*btf_p = btf; return ret;
}
btf_put(btf);
spin_lock_bh(&btf_idr_lock);
}
spin_unlock_bh(&btf_idr_lock); return ret;
}
EXPORT_SYMBOL_GPL(bpf_find_btf_id);
/* Types that act only as a source, not sink or intermediate * type when resolving.
*/ staticbool btf_type_is_resolve_source_only(conststruct btf_type *t)
{ return btf_type_is_var(t) ||
btf_type_is_decl_tag(t) ||
btf_type_is_datasec(t);
}
/* What types need to be resolved? * * btf_type_is_modifier() is an obvious one. * * btf_type_is_struct() because its member refers to * another type (through member->type). * * btf_type_is_var() because the variable refers to * another type. btf_type_is_datasec() holds multiple * btf_type_is_var() types that need resolving. * * btf_type_is_array() because its element (array->type) * refers to another type. Array can be thought of a * special case of struct while array just has the same * member-type repeated by array->nelems of times.
*/ staticbool btf_type_needs_resolve(conststruct btf_type *t)
{ return btf_type_is_modifier(t) ||
btf_type_is_ptr(t) ||
btf_type_is_struct(t) ||
btf_type_is_array(t) ||
btf_type_is_var(t) ||
btf_type_is_func(t) ||
btf_type_is_decl_tag(t) ||
btf_type_is_datasec(t);
}
/* t->size can be used */ staticbool btf_type_has_size(conststruct btf_type *t)
{ switch (BTF_INFO_KIND(t->info)) { case BTF_KIND_INT: case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: case BTF_KIND_DATASEC: case BTF_KIND_FLOAT: case BTF_KIND_ENUM64: returntrue;
}
/* * Check that the type @t is a regular int. This means that @t is not * a bit field and it has the same size as either of u8/u16/u32/u64 * or __int128. If @expected_size is not zero, then size of @t should * be the same. A caller should already have checked that the type @t * is an integer.
*/ staticbool __btf_type_int_is_regular(conststruct btf_type *t, size_t expected_size)
{
u32 int_data = btf_type_int(t);
u8 nr_bits = BTF_INT_BITS(int_data);
u8 nr_bytes = BITS_ROUNDUP_BYTES(nr_bits);
/* if kflag set, int should be a regular int and * bit offset should be at byte boundary.
*/ return !bitfield_size &&
BITS_ROUNDUP_BYTES(bit_offset) == expected_offset &&
BITS_ROUNDUP_BYTES(nr_bits) == expected_size;
}
/* Similar to btf_type_skip_modifiers() but does not skip typedefs. */ staticconststruct btf_type *btf_type_skip_qualifiers(conststruct btf *btf,
u32 id)
{ conststruct btf_type *t = btf_type_by_id(btf, id);
while (btf_type_is_modifier(t) &&
BTF_INFO_KIND(t->info) != BTF_KIND_TYPEDEF) {
t = btf_type_by_id(btf, t->type);
}
return t;
}
#define BTF_SHOW_MAX_ITER 10
#define BTF_KIND_BIT(kind) (1ULL << kind)
/* * Populate show->state.name with type name information. * Format of type name is * * [.member_name = ] (type_name)
*/ staticconstchar *btf_show_name(struct btf_show *show)
{ /* BTF_MAX_ITER array suffixes "[]" */ constchar *array_suffixes = "[][][][][][][][][][]"; constchar *array_suffix = &array_suffixes[strlen(array_suffixes)]; /* BTF_MAX_ITER pointer suffixes "*" */ constchar *ptr_suffixes = "**********"; constchar *ptr_suffix = &ptr_suffixes[strlen(ptr_suffixes)]; constchar *name = NULL, *prefix = "", *parens = ""; conststruct btf_member *m = show->state.member; conststruct btf_type *t; conststruct btf_array *array;
u32 id = show->state.type_id; constchar *member = NULL; bool show_member = false;
u64 kinds = 0; int i;
show->state.name[0] = '\0';
/* * Don't show type name if we're showing an array member; * in that case we show the array type so don't need to repeat * ourselves for each member.
*/ if (show->state.array_member) return"";
/* Retrieve member name, if any. */ if (m) {
member = btf_name_by_offset(show->btf, m->name_off);
show_member = strlen(member) > 0;
id = m->type;
}
/* * Start with type_id, as we have resolved the struct btf_type * * via btf_modifier_show() past the parent typedef to the child * struct, int etc it is defined as. In such cases, the type_id * still represents the starting type while the struct btf_type * * in our show->state points at the resolved type of the typedef.
*/
t = btf_type_by_id(show->btf, id); if (!t) return"";
/* * The goal here is to build up the right number of pointer and * array suffixes while ensuring the type name for a typedef * is represented. Along the way we accumulate a list of * BTF kinds we have encountered, since these will inform later * display; for example, pointer types will not require an * opening "{" for struct, we will just display the pointer value. * * We also want to accumulate the right number of pointer or array * indices in the format string while iterating until we get to * the typedef/pointee/array member target type. * * We start by pointing at the end of pointer and array suffix * strings; as we accumulate pointers and arrays we move the pointer * or array string backwards so it will show the expected number of * '*' or '[]' for the type. BTF_SHOW_MAX_ITER of nesting of pointers * and/or arrays and typedefs are supported as a precaution. * * We also want to get typedef name while proceeding to resolve * type it points to so that we can add parentheses if it is a * "typedef struct" etc.
*/ for (i = 0; i < BTF_SHOW_MAX_ITER; i++) {
switch (BTF_INFO_KIND(t->info)) { case BTF_KIND_TYPEDEF: if (!name)
name = btf_name_by_offset(show->btf,
t->name_off);
kinds |= BTF_KIND_BIT(BTF_KIND_TYPEDEF);
id = t->type; break; case BTF_KIND_ARRAY:
kinds |= BTF_KIND_BIT(BTF_KIND_ARRAY);
parens = "["; if (!t) return"";
array = btf_type_array(t); if (array_suffix > array_suffixes)
array_suffix -= 2;
id = array->type; break; case BTF_KIND_PTR:
kinds |= BTF_KIND_BIT(BTF_KIND_PTR); if (ptr_suffix > ptr_suffixes)
ptr_suffix -= 1;
id = t->type; break; default:
id = 0; break;
} if (!id) break;
t = btf_type_skip_qualifiers(show->btf, id);
} /* We may not be able to represent this type; bail to be safe */ if (i == BTF_SHOW_MAX_ITER) return"";
if (!name)
name = btf_name_by_offset(show->btf, t->name_off);
switch (BTF_INFO_KIND(t->info)) { case BTF_KIND_STRUCT: case BTF_KIND_UNION:
prefix = BTF_INFO_KIND(t->info) == BTF_KIND_STRUCT ? "struct" : "union"; /* if it's an array of struct/union, parens is already set */ if (!(kinds & (BTF_KIND_BIT(BTF_KIND_ARRAY))))
parens = "{"; break; case BTF_KIND_ENUM: case BTF_KIND_ENUM64:
prefix = "enum"; break; default: break;
}
/* pointer does not require parens */ if (kinds & BTF_KIND_BIT(BTF_KIND_PTR))
parens = ""; /* typedef does not require struct/union/enum prefix */ if (kinds & BTF_KIND_BIT(BTF_KIND_TYPEDEF))
prefix = "";
if (!name)
name = "";
/* Even if we don't want type name info, we want parentheses etc */ if (show->flags & BTF_SHOW_NONAME)
snprintf(show->state.name, sizeof(show->state.name), "%s",
parens); else
snprintf(show->state.name, sizeof(show->state.name), "%s%s%s(%s%s%s%s%s%s)%s", /* first 3 strings comprise ".member = " */
show_member ? "." : "",
show_member ? member : "",
show_member ? " = " : "", /* ...next is our prefix (struct, enum, etc) */
prefix,
strlen(prefix) > 0 && strlen(name) > 0 ? " " : "", /* ...this is the type name itself */
name, /* ...suffixed by the appropriate '*', '[]' suffixes */
strlen(ptr_suffix) > 0 ? " " : "", ptr_suffix,
array_suffix, parens);
/* Macros are used here as btf_show_type_value[s]() prepends and appends * format specifiers to the format specifier passed in; these do the work of * adding indentation, delimiters etc while the caller simply has to specify * the type value(s) in the format specifier + value(s).
*/ #define btf_show_type_value(show, fmt, value) \ do { \ if ((value) != (__typeof__(value))0 || \
(show->flags & BTF_SHOW_ZERO) || \
show->state.depth == 0) { \
btf_show(show, "%s%s" fmt "%s%s", \
btf_show_indent(show), \
btf_show_name(show), \
value, btf_show_delim(show), \
btf_show_newline(show)); \ if (show->state.depth > show->state.depth_to_show) \
show->state.depth_to_show = show->state.depth; \
} \
} while (0)
/* How much is left to copy to safe buffer after @data? */ staticint btf_show_obj_size_left(struct btf_show *show, void *data)
{ return show->obj.head + show->obj.size - data;
}
/* Is object pointed to by @data of @size already copied to our safe buffer? */ staticbool btf_show_obj_is_safe(struct btf_show *show, void *data, int size)
{ return data >= show->obj.data &&
(data + size) < (show->obj.data + BTF_SHOW_OBJ_SAFE_SIZE);
}
/* * If object pointed to by @data of @size falls within our safe buffer, return * the equivalent pointer to the same safe data. Assumes * copy_from_kernel_nofault() has already happened and our safe buffer is * populated.
*/ staticvoid *__btf_show_obj_safe(struct btf_show *show, void *data, int size)
{ if (btf_show_obj_is_safe(show, data, size)) return show->obj.safe + (data - show->obj.data); return NULL;
}
/* * Return a safe-to-access version of data pointed to by @data. * We do this by copying the relevant amount of information * to the struct btf_show obj.safe buffer using copy_from_kernel_nofault(). * * If BTF_SHOW_UNSAFE is specified, just return data as-is; no * safe copy is needed. * * Otherwise we need to determine if we have the required amount * of data (determined by the @data pointer and the size of the * largest base type we can encounter (represented by * BTF_SHOW_OBJ_BASE_TYPE_SIZE). Having that much data ensures * that we will be able to print some of the current object, * and if more is needed a copy will be triggered. * Some objects such as structs will not fit into the buffer; * in such cases additional copies when we iterate over their * members may be needed. * * btf_show_obj_safe() is used to return a safe buffer for * btf_show_start_type(); this ensures that as we recurse into * nested types we always have safe data for the given type. * This approach is somewhat wasteful; it's possible for example * that when iterating over a large union we'll end up copying the * same data repeatedly, but the goal is safety not performance. * We use stack data as opposed to per-CPU buffers because the * iteration over a type can take some time, and preemption handling * would greatly complicate use of the safe buffer.
*/ staticvoid *btf_show_obj_safe(struct btf_show *show, conststruct btf_type *t, void *data)
{ conststruct btf_type *rt; int size_left, size; void *safe = NULL;
/* * Is this toplevel object? If so, set total object size and * initialize pointers. Otherwise check if we still fall within * our safe object data.
*/ if (show->state.depth == 0) {
show->obj.size = size;
show->obj.head = data;
} else { /* * If the size of the current object is > our remaining * safe buffer we _may_ need to do a new copy. However * consider the case of a nested struct; it's size pushes * us over the safe buffer limit, but showing any individual * struct members does not. In such cases, we don't need * to initiate a fresh copy yet; however we definitely need * at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes left * in our buffer, regardless of the current object size. * The logic here is that as we resolve types we will * hit a base type at some point, and we need to be sure * the next chunk of data is safely available to display * that type info safely. We cannot rely on the size of * the current object here because it may be much larger * than our current buffer (e.g. task_struct is 8k). * All we want to do here is ensure that we can print the * next basic type, which we can if either * - the current type size is within the safe buffer; or * - at least BTF_SHOW_OBJ_BASE_TYPE_SIZE bytes are left in * the safe buffer.
*/
safe = __btf_show_obj_safe(show, data,
min(size,
BTF_SHOW_OBJ_BASE_TYPE_SIZE));
}
/* * We need a new copy to our safe object, either because we haven't * yet copied and are initializing safe data, or because the data * we want falls outside the boundaries of the safe object.
*/ if (!safe) {
size_left = btf_show_obj_size_left(show, data); if (size_left > BTF_SHOW_OBJ_SAFE_SIZE)
size_left = BTF_SHOW_OBJ_SAFE_SIZE;
show->state.status = copy_from_kernel_nofault(show->obj.safe,
data, size_left); if (!show->state.status) {
show->obj.data = data;
safe = show->obj.safe;
}
}
return safe;
}
/* * Set the type we are starting to show and return a safe data pointer * to be used for showing the associated data.
*/ staticvoid *btf_show_start_type(struct btf_show *show, conststruct btf_type *t,
u32 type_id, void *data)
{
show->state.type = t;
show->state.type_id = type_id;
show->state.name[0] = '\0';
if (log->level == BPF_LOG_KERNEL) { /* btf verifier prints all types it is processing via * btf_verifier_log_type(..., fmt = NULL). * Skip those prints for in-kernel BTF verification.
*/ if (!fmt) return;
/* Skip logging when loading module BTF with mismatches permitted */ if (env->btf->base_btf && IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) return;
}
if (log->level == BPF_LOG_KERNEL) { if (!fmt) return;
/* Skip logging when loading module BTF with mismatches permitted */ if (env->btf->base_btf && IS_ENABLED(CONFIG_MODULE_ALLOW_BTF_MISMATCH)) return;
}
/* The CHECK_META phase already did a btf dump. * * If member is logged again, it must hit an error in * parsing this member. It is useful to print out which * struct this member belongs to.
*/ if (env->phase != CHECK_META)
btf_verifier_log_type(env, struct_type, NULL);
/* * In map-in-map, calling map_delete_elem() on outer * map will call bpf_map_put on the inner map. * It will then eventually call btf_free_id() * on the inner map. Some of the map_delete_elem() * implementation may have irq disabled, so * we need to use the _irqsave() version instead * of the _bh() version.
*/
spin_lock_irqsave(&btf_idr_lock, flags);
idr_remove(&btf_idr, btf->id);
spin_unlock_irqrestore(&btf_idr_lock, flags);
}
staticbool env_type_is_resolve_sink(conststruct btf_verifier_env *env, conststruct btf_type *next_type)
{ switch (env->resolve_mode) { case RESOLVE_TBD: /* int, enum or void is a sink */ return !btf_type_needs_resolve(next_type); case RESOLVE_PTR: /* int, enum, void, struct, array, func or func_proto is a sink * for ptr
*/ return !btf_type_is_modifier(next_type) &&
!btf_type_is_ptr(next_type); case RESOLVE_STRUCT_OR_ARRAY: /* int, enum, void, ptr, func or func_proto is a sink * for struct and array
*/ return !btf_type_is_modifier(next_type) &&
!btf_type_is_array(next_type) &&
!btf_type_is_struct(next_type); default:
BUG();
}
}
staticbool env_type_is_resolved(conststruct btf_verifier_env *env,
u32 type_id)
{ /* base BTF types should be resolved by now */ if (type_id < env->btf->start_id) returntrue;
type_id -= btf->start_id; /* adjust to local type id */
btf->resolved_sizes[type_id] = resolved_size;
btf->resolved_ids[type_id] = resolved_type_id;
env->visit_states[type_id] = RESOLVED;
}
/* Resolve the size of a passed-in "type" * * type: is an array (e.g. u32 array[x][y]) * return type: type "u32[x][y]", i.e. BTF_KIND_ARRAY, * *type_size: (x * y * sizeof(u32)). Hence, *type_size always * corresponds to the return type. * *elem_type: u32 * *elem_id: id of u32 * *total_nelems: (x * y). Hence, individual elem size is * (*type_size / *total_nelems) * *type_id: id of type if it's changed within the function, 0 if not * * type: is not an array (e.g. const struct X) * return type: type "struct X" * *type_size: sizeof(struct X) * *elem_type: same as return type ("struct X") * *elem_id: 0 * *total_nelems: 1 * *type_id: id of type if it's changed within the function, 0 if not
*/ staticconststruct btf_type *
__btf_resolve_size(conststruct btf *btf, conststruct btf_type *type,
u32 *type_size, conststruct btf_type **elem_type,
u32 *elem_id, u32 *total_nelems, u32 *type_id)
{ conststruct btf_type *array_type = NULL; conststruct btf_array *array = NULL;
u32 i, size, nelems = 1, id = 0;
for (i = 0; i < MAX_RESOLVE_DEPTH; i++) { switch (BTF_INFO_KIND(type->info)) { /* type->size can be used */ case BTF_KIND_INT: case BTF_KIND_STRUCT: case BTF_KIND_UNION: case BTF_KIND_ENUM: case BTF_KIND_FLOAT: case BTF_KIND_ENUM64:
size = type->size; goto resolved;
case BTF_KIND_PTR:
size = sizeof(void *); goto resolved;
/* Modifiers */ case BTF_KIND_TYPEDEF: case BTF_KIND_VOLATILE: case BTF_KIND_CONST: case BTF_KIND_RESTRICT: case BTF_KIND_TYPE_TAG:
id = type->type;
type = btf_type_by_id(btf, type->type); break;
case BTF_KIND_ARRAY: if (!array_type)
array_type = type;
array = btf_type_array(type); if (nelems && array->nelems > U32_MAX / nelems) return ERR_PTR(-EINVAL);
nelems *= array->nelems;
type = btf_type_by_id(btf, array->type); break;
/* type without size */ default: return ERR_PTR(-EINVAL);
}
}
/* The input param "type_id" must point to a needs_resolve type */ staticconststruct btf_type *btf_type_id_resolve(conststruct btf *btf,
u32 *type_id)
{
*type_id = btf_resolved_type_id(btf, *type_id); return btf_type_by_id(btf, *type_id);
}
/* Used for ptr, array struct/union and float type members. * int, enum and modifier types have their specific callback functions.
*/ staticint btf_generic_check_kflag_member(struct btf_verifier_env *env, conststruct btf_type *struct_type, conststruct btf_member *member, conststruct btf_type *member_type)
{ if (BTF_MEMBER_BITFIELD_SIZE(member->offset)) {
btf_verifier_log_member(env, struct_type, member, "Invalid member bitfield_size"); return -EINVAL;
}
/* bitfield size is 0, so member->offset represents bit offset only. * It is safe to call non kflag check_member variants.
*/ return btf_type_ops(member_type)->check_member(env, struct_type,
member,
member_type);
}
/* a regular int type is required for the kflag int member */ if (!btf_type_int_is_regular(member_type)) {
btf_verifier_log_member(env, struct_type, member, "Invalid member base type"); return -EINVAL;
}
/* check sanity of bitfield size */
nr_bits = BTF_MEMBER_BITFIELD_SIZE(member->offset);
struct_bits_off = BTF_MEMBER_BIT_OFFSET(member->offset);
nr_int_data_bits = BTF_INT_BITS(int_data); if (!nr_bits) { /* Not a bitfield member, member offset must be at byte * boundary.
*/ if (BITS_PER_BYTE_MASKED(struct_bits_off)) {
btf_verifier_log_member(env, struct_type, member, "Invalid member offset"); return -EINVAL;
}
/* * Only one of the encoding bits is allowed and it * should be sufficient for the pretty print purpose (i.e. decoding). * Multiple bits can be allowed later if it is found * to be insufficient.
*/
encoding = BTF_INT_ENCODING(int_data); if (encoding &&
encoding != BTF_INT_SIGNED &&
encoding != BTF_INT_CHAR &&
encoding != BTF_INT_BOOL) {
btf_verifier_log_type(env, t, "Unsupported encoding"); return -ENOTSUPP;
}
if (!env_type_is_resolve_sink(env, next_type) &&
!env_type_is_resolved(env, next_type_id)) return env_stack_push(env, next_type, next_type_id);
/* Figure out the resolved next_type_id with size. * They will be stored in the current modifier's * resolved_ids and resolved_sizes such that it can * save us a few type-following when we use it later (e.g. in * pretty print).
*/ if (!btf_type_id_size(btf, &next_type_id, NULL)) { if (env_type_is_resolved(env, next_type_id))
next_type = btf_type_id_resolve(btf, &next_type_id);
/* We must resolve to something concrete at this point, no * forward types or similar that would resolve to size of * zero is allowed.
*/ if (!btf_type_id_size(btf, &next_type_id, NULL)) {
btf_verifier_log_type(env, v->t, "Invalid type_id"); return -EINVAL;
}
if (!env_type_is_resolve_sink(env, next_type) &&
!env_type_is_resolved(env, next_type_id)) return env_stack_push(env, next_type, next_type_id);
/* If the modifier was RESOLVED during RESOLVE_STRUCT_OR_ARRAY, * the modifier may have stopped resolving when it was resolved * to a ptr (last-resolved-ptr). * * We now need to continue from the last-resolved-ptr to * ensure the last-resolved-ptr will not referring back to * the current ptr (t).
*/ if (btf_type_is_modifier(next_type)) { conststruct btf_type *resolved_type;
u32 resolved_type_id;
safe_data = btf_show_start_type(show, t, type_id, data); if (!safe_data) return;
/* It is a hashed value unless BTF_SHOW_PTR_RAW is specified */ if (show->flags & BTF_SHOW_PTR_RAW)
btf_show_type_value(show, "0x%px", *(void **)safe_data); else
btf_show_type_value(show, "0x%p", *(void **)safe_data);
btf_show_end_type(show);
}
/* fwd type must have a valid name */ if (!t->name_off ||
!btf_name_valid_identifier(env->btf, t->name_off)) {
btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL;
}
/* Array elem type and index type cannot be in type void, * so !array->type and !array->index_type are not allowed.
*/ if (!array->type || !BTF_TYPE_ID_VALID(array->type)) {
btf_verifier_log_type(env, t, "Invalid elem"); return -EINVAL;
}
if (elem_type && btf_type_is_int(elem_type)) {
u32 int_type = btf_type_int(elem_type);
encoding = BTF_INT_ENCODING(int_type);
/* * BTF_INT_CHAR encoding never seems to be set for * char arrays, so if size is 1 and element is * printable as a char, we'll do that.
*/ if (elem_size == 1)
encoding = BTF_INT_CHAR;
}
if (!btf_show_start_array_type(show, t, type_id, encoding, data)) return;
if (!elem_type) goto out;
elem_ops = btf_type_ops(elem_type);
for (i = 0; i < array->nelems; i++) {
btf_show_start_array_member(show);
elem_ops->show(btf, elem_type, elem_type_id, data,
bits_offset, show);
data += elem_size;
btf_show_end_array_member(show);
if (show->state.array_terminated) break;
}
out:
btf_show_end_array_type(show);
}
/* * First check if any members would be shown (are non-zero). * See comments above "struct btf_show" definition for more * details on how this works at a high-level.
*/ if (show->state.depth > 0 && !(show->flags & BTF_SHOW_ZERO)) { if (!show->state.depth_check) {
show->state.depth_check = show->state.depth + 1;
show->state.depth_to_show = 0;
}
__btf_array_show(btf, t, type_id, data, bits_offset, show);
show->state.member = m;
if (show->state.depth_check != show->state.depth + 1) return;
show->state.depth_check = 0;
if (show->state.depth_to_show <= show->state.depth) return; /* * Reaching here indicates we have recursed and found * non-zero array member(s).
*/
}
__btf_array_show(btf, t, type_id, data, bits_offset, show);
}
/* struct type either no name or a valid one */ if (t->name_off &&
!btf_name_valid_identifier(env->btf, t->name_off)) {
btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL;
}
/* struct member either no name or a valid one */ if (member->name_off &&
!btf_name_valid_identifier(btf, member->name_off)) {
btf_verifier_log_member(env, t, member, "Invalid name"); return -EINVAL;
} /* A member cannot be in type void */ if (!member->type || !BTF_TYPE_ID_VALID(member->type)) {
btf_verifier_log_member(env, t, member, "Invalid type_id"); return -EINVAL;
}
offset = __btf_member_bit_offset(t, member); if (is_union && offset) {
btf_verifier_log_member(env, t, member, "Invalid member bits_offset"); return -EINVAL;
}
/* * ">" instead of ">=" because the last member could be * "char a[0];"
*/ if (last_offset > offset) {
btf_verifier_log_member(env, t, member, "Invalid member bits_offset"); return -EINVAL;
}
if (BITS_ROUNDUP_BYTES(offset) > struct_size) {
btf_verifier_log_member(env, t, member, "Member bits_offset exceeds its struct size"); return -EINVAL;
}
/* Before continue resolving the next_member, * ensure the last member is indeed resolved to a * type with size info.
*/ if (v->next_member) { conststruct btf_type *last_member_type; conststruct btf_member *last_member;
u32 last_member_type_id;
/* Permit modifiers on the pointer itself */ if (btf_type_is_volatile(t))
t = btf_type_by_id(btf, t->type); /* For PTR, sz is always == 8 */ if (!btf_type_is_ptr(t)) return BTF_FIELD_IGNORE;
t = btf_type_by_id(btf, t->type);
is_type_tag = btf_type_is_type_tag(t) && !btf_type_kflag(t); if (!is_type_tag) return BTF_FIELD_IGNORE; /* Reject extra tags */ if (btf_type_is_type_tag(btf_type_by_id(btf, t->type))) return -EINVAL;
tag_value = __btf_name_by_offset(btf, t->name_off); if (!strcmp("kptr_untrusted", tag_value))
type = BPF_KPTR_UNREF; elseif (!strcmp("kptr", tag_value))
type = BPF_KPTR_REF; elseif (!strcmp("percpu_kptr", tag_value))
type = BPF_KPTR_PERCPU; elseif (!strcmp("uptr", tag_value))
type = BPF_UPTR; else return -EINVAL;
if (!(type & field_mask)) return BTF_FIELD_IGNORE;
/* Get the base type */
t = btf_type_skip_modifiers(btf, t->type, &res_id); /* Only pointer to struct is allowed */ if (!__btf_type_is_struct(t)) return -EINVAL;
int btf_find_next_decl_tag(conststruct btf *btf, conststruct btf_type *pt, int comp_idx, constchar *tag_key, int last_id)
{ int len = strlen(tag_key); int i, n;
for (i = last_id + 1, n = btf_nr_types(btf); i < n; i++) { conststruct btf_type *t = btf_type_by_id(btf, i);
if (!btf_type_is_decl_tag(t)) continue; if (pt != btf_type_by_id(btf, t->type)) continue; if (btf_type_decl_tag(t)->component_idx != comp_idx) continue; if (strncmp(__btf_name_by_offset(btf, t->name_off), tag_key, len)) continue; return i;
} return -ENOENT;
}
staticint btf_get_field_type(conststruct btf *btf, conststruct btf_type *var_type,
u32 field_mask, u32 *seen_mask, int *align, int *sz)
{ int type = 0; constchar *name = __btf_name_by_offset(btf, var_type->name_off);
if (field_mask & BPF_SPIN_LOCK) { if (!strcmp(name, "bpf_spin_lock")) { if (*seen_mask & BPF_SPIN_LOCK) return -E2BIG;
*seen_mask |= BPF_SPIN_LOCK;
type = BPF_SPIN_LOCK; goto end;
}
} if (field_mask & BPF_RES_SPIN_LOCK) { if (!strcmp(name, "bpf_res_spin_lock")) { if (*seen_mask & BPF_RES_SPIN_LOCK) return -E2BIG;
*seen_mask |= BPF_RES_SPIN_LOCK;
type = BPF_RES_SPIN_LOCK; goto end;
}
} if (field_mask & BPF_TIMER) { if (!strcmp(name, "bpf_timer")) { if (*seen_mask & BPF_TIMER) return -E2BIG;
*seen_mask |= BPF_TIMER;
type = BPF_TIMER; goto end;
}
} if (field_mask & BPF_WORKQUEUE) { if (!strcmp(name, "bpf_wq")) { if (*seen_mask & BPF_WORKQUEUE) return -E2BIG;
*seen_mask |= BPF_WORKQUEUE;
type = BPF_WORKQUEUE; goto end;
}
}
field_mask_test_name(BPF_LIST_HEAD, "bpf_list_head");
field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
field_mask_test_name(BPF_RB_ROOT, "bpf_rb_root");
field_mask_test_name(BPF_RB_NODE, "bpf_rb_node");
field_mask_test_name(BPF_REFCOUNT, "bpf_refcount");
/* Only return BPF_KPTR when all other types with matchable names fail */ if (field_mask & (BPF_KPTR | BPF_UPTR) && !__btf_type_is_struct(var_type)) {
type = BPF_KPTR_REF; goto end;
} return 0;
end:
*sz = btf_field_type_size(type);
*align = btf_field_type_align(type); return type;
}
#undef field_mask_test_name
/* Repeat a number of fields for a specified number of times. * * Copy the fields starting from the first field and repeat them for * repeat_cnt times. The fields are repeated by adding the offset of each * field with * (i + 1) * elem_size * where i is the repeat index and elem_size is the size of an element.
*/ staticint btf_repeat_fields(struct btf_field_info *info, int info_cnt,
u32 field_cnt, u32 repeat_cnt, u32 elem_size)
{
u32 i, j;
u32 cur;
/* Ensure not repeating fields that should not be repeated. */ for (i = 0; i < field_cnt; i++) { switch (info[i].type) { case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: case BPF_UPTR: case BPF_LIST_HEAD: case BPF_RB_ROOT: break; default: return -EINVAL;
}
}
/* The type of struct size or variable size is u32, * so the multiplication will not overflow.
*/ if (field_cnt * (repeat_cnt + 1) > info_cnt) return -E2BIG;
cur = field_cnt; for (i = 0; i < repeat_cnt; i++) {
memcpy(&info[cur], &info[0], field_cnt * sizeof(info[0])); for (j = 0; j < field_cnt; j++)
info[cur++].off += (i + 1) * elem_size;
}
/* Find special fields in the struct type of a field. * * This function is used to find fields of special types that is not a * global variable or a direct field of a struct type. It also handles the * repetition if it is the element type of an array.
*/ staticint btf_find_nested_struct(conststruct btf *btf, conststruct btf_type *t,
u32 off, u32 nelems,
u32 field_mask, struct btf_field_info *info, int info_cnt, u32 level)
{ int ret, err, i;
level++; if (level >= MAX_RESOLVE_DEPTH) return -E2BIG;
ret = btf_find_struct_field(btf, t, field_mask, info, info_cnt, level);
if (ret <= 0) return ret;
/* Shift the offsets of the nested struct fields to the offsets * related to the container.
*/ for (i = 0; i < ret; i++)
info[i].off += off;
if (nelems > 1) {
err = btf_repeat_fields(info, info_cnt, ret, nelems - 1, t->size); if (err == 0)
ret *= nelems; else
ret = err;
}
/* Walk into array types to find the element type and the number of * elements in the (flattened) array.
*/ for (i = 0; i < MAX_RESOLVE_DEPTH && btf_type_is_array(var_type); i++) {
array = btf_array(var_type);
nelems *= array->nelems;
var_type = btf_type_by_id(btf, array->type);
} if (i == MAX_RESOLVE_DEPTH) return -E2BIG; if (nelems == 0) return 0;
field_type = btf_get_field_type(btf, var_type,
field_mask, seen_mask, &align, &sz); /* Look into variables of struct types */ if (!field_type && __btf_type_is_struct(var_type)) {
sz = var_type->size; if (expected_size && expected_size != sz * nelems) return 0;
ret = btf_find_nested_struct(btf, var_type, off, nelems, field_mask,
&info[0], info_cnt, level); return ret;
}
if (field_type == 0) return 0; if (field_type < 0) return field_type;
if (expected_size && expected_size != sz * nelems) return 0; if (off % align) return 0;
switch (field_type) { case BPF_SPIN_LOCK: case BPF_RES_SPIN_LOCK: case BPF_TIMER: case BPF_WORKQUEUE: case BPF_LIST_NODE: case BPF_RB_NODE: case BPF_REFCOUNT:
ret = btf_find_struct(btf, var_type, off, sz, field_type,
info_cnt ? &info[0] : &tmp); if (ret < 0) return ret; break; case BPF_KPTR_UNREF: case BPF_KPTR_REF: case BPF_KPTR_PERCPU: case BPF_UPTR:
ret = btf_find_kptr(btf, var_type, off, sz,
info_cnt ? &info[0] : &tmp, field_mask); if (ret < 0) return ret; break; case BPF_LIST_HEAD: case BPF_RB_ROOT:
ret = btf_find_graph_root(btf, var, var_type,
var_idx, off, sz,
info_cnt ? &info[0] : &tmp,
field_type); if (ret < 0) return ret; break; default: return -EFAULT;
}
if (ret == BTF_FIELD_IGNORE) return 0; if (!info_cnt) return -E2BIG; if (nelems > 1) {
ret = btf_repeat_fields(info, info_cnt, 1, nelems - 1, sz); if (ret < 0) return ret;
} return nelems;
}
/* Callers have to ensure the life cycle of btf if it is program BTF */ staticint btf_parse_kptr(conststruct btf *btf, struct btf_field *field, struct btf_field_info *info)
{ struct module *mod = NULL; conststruct btf_type *t; /* If a matching btf type is found in kernel or module BTFs, kptr_ref * is that BTF, otherwise it's program BTF
*/ struct btf *kptr_btf; int ret;
s32 id;
/* Find type in map BTF, and use it to look up the matching type * in vmlinux or module BTFs, by name and kind.
*/
t = btf_type_by_id(btf, info->kptr.type_id);
id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
&kptr_btf); if (id == -ENOENT) { /* btf_parse_kptr should only be called w/ btf = program BTF */
WARN_ON_ONCE(btf_is_kernel(btf));
/* Type exists only in program BTF. Assume that it's a MEM_ALLOC * kptr allocated via bpf_obj_new
*/
field->kptr.dtor = NULL;
id = info->kptr.type_id;
kptr_btf = (struct btf *)btf; goto found_dtor;
} if (id < 0) return id;
/* Find and stash the function pointer for the destruction function that * needs to be eventually invoked from the map free path.
*/ if (info->type == BPF_KPTR_REF) { conststruct btf_type *dtor_func; constchar *dtor_func_name; unsignedlong addr;
s32 dtor_btf_id;
/* This call also serves as a whitelist of allowed objects that * can be used as a referenced pointer and be stored in a map at * the same time.
*/
dtor_btf_id = btf_find_dtor_kfunc(kptr_btf, id); if (dtor_btf_id < 0) {
ret = dtor_btf_id; goto end_btf;
}
dtor_func = btf_type_by_id(kptr_btf, dtor_btf_id); if (!dtor_func) {
ret = -ENOENT; goto end_btf;
}
if (btf_is_module(kptr_btf)) {
mod = btf_try_get_module(kptr_btf); if (!mod) {
ret = -ENXIO; goto end_btf;
}
}
/* We already verified dtor_func to be btf_type_is_func * in register_btf_id_dtor_kfuncs.
*/
dtor_func_name = __btf_name_by_offset(kptr_btf, dtor_func->name_off);
addr = kallsyms_lookup_name(dtor_func_name); if (!addr) {
ret = -EINVAL; goto end_mod;
}
field->kptr.dtor = (void *)addr;
}
t = btf_type_by_id(btf, info->graph_root.value_btf_id); /* We've already checked that value_btf_id is a struct type. We * just need to figure out the offset of the list_node, and * verify its type.
*/
for_each_member(i, t, member) { if (strcmp(info->graph_root.node_name,
__btf_name_by_offset(btf, member->name_off))) continue; /* Invalid BTF, two members with same name */ if (n) return -EINVAL;
n = btf_type_by_id(btf, member->type); if (!__btf_type_is_struct(n)) return -EINVAL; if (strcmp(node_type_name, __btf_name_by_offset(btf, n->name_off))) return -EINVAL;
offset = __btf_member_bit_offset(n, member); if (offset % 8) return -EINVAL;
offset /= 8; if (offset % node_type_align) return -EINVAL;
ret = btf_find_field(btf, t, field_mask, info_arr, ARRAY_SIZE(info_arr)); if (ret < 0) return ERR_PTR(ret); if (!ret) return NULL;
cnt = ret; /* This needs to be kzalloc to zero out padding and unused fields, see * comment in btf_record_equal.
*/
rec = kzalloc(struct_size(rec, fields, cnt), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); if (!rec) return ERR_PTR(-ENOMEM);
rec->spin_lock_off = -EINVAL;
rec->res_spin_lock_off = -EINVAL;
rec->timer_off = -EINVAL;
rec->wq_off = -EINVAL;
rec->refcount_off = -EINVAL; for (i = 0; i < cnt; i++) {
field_type_size = btf_field_type_size(info_arr[i].type); if (info_arr[i].off + field_type_size > value_size) {
WARN_ONCE(1, "verifier bug off %d size %d", info_arr[i].off, value_size);
ret = -EFAULT; goto end;
} if (info_arr[i].off < next_off) {
ret = -EEXIST; goto end;
}
next_off = info_arr[i].off + field_type_size;
int btf_check_and_fixup_fields(conststruct btf *btf, struct btf_record *rec)
{ int i;
/* There are three types that signify ownership of some other type: * kptr_ref, bpf_list_head, bpf_rb_root. * kptr_ref only supports storing kernel types, which can't store * references to program allocated local types. * * Hence we only need to ensure that bpf_{list_head,rb_root} ownership * does not form cycles.
*/ if (IS_ERR_OR_NULL(rec) || !(rec->field_mask & (BPF_GRAPH_ROOT | BPF_UPTR))) return 0; for (i = 0; i < rec->cnt; i++) { struct btf_struct_meta *meta; conststruct btf_type *t;
u32 btf_id;
if (rec->fields[i].type == BPF_UPTR) { /* The uptr only supports pinning one page and cannot * point to a kernel struct
*/ if (btf_is_kernel(rec->fields[i].kptr.btf)) return -EINVAL;
t = btf_type_by_id(rec->fields[i].kptr.btf,
rec->fields[i].kptr.btf_id); if (!t->size) return -EINVAL; if (t->size > PAGE_SIZE) return -E2BIG; continue;
}
if (!(rec->fields[i].type & BPF_GRAPH_ROOT)) continue;
btf_id = rec->fields[i].graph_root.value_btf_id;
meta = btf_find_struct_meta(btf, btf_id); if (!meta) return -EFAULT;
rec->fields[i].graph_root.value_rec = meta->record;
/* We need to set value_rec for all root types, but no need * to check ownership cycle for a type unless it's also a * node type.
*/ if (!(rec->field_mask & BPF_GRAPH_NODE)) continue;
/* We need to ensure ownership acyclicity among all types. The * proper way to do it would be to topologically sort all BTF * IDs based on the ownership edges, since there can be multiple * bpf_{list_head,rb_node} in a type. Instead, we use the * following resaoning: * * - A type can only be owned by another type in user BTF if it * has a bpf_{list,rb}_node. Let's call these node types. * - A type can only _own_ another type in user BTF if it has a * bpf_{list_head,rb_root}. Let's call these root types. * * We ensure that if a type is both a root and node, its * element types cannot be root types. * * To ensure acyclicity: * * When A is an root type but not a node, its ownership * chain can be: * A -> B -> C * Where: * - A is an root, e.g. has bpf_rb_root. * - B is both a root and node, e.g. has bpf_rb_node and * bpf_list_head. * - C is only an root, e.g. has bpf_list_node * * When A is both a root and node, some other type already * owns it in the BTF domain, hence it can not own * another root type through any of the ownership edges. * A -> B * Where: * - A is both an root and node. * - B is only an node.
*/ if (meta->record->field_mask & BPF_GRAPH_ROOT) return -ELOOP;
} return 0;
}
/* * First check if any members would be shown (are non-zero). * See comments above "struct btf_show" definition for more * details on how this works at a high-level.
*/ if (show->state.depth > 0 && !(show->flags & BTF_SHOW_ZERO)) { if (!show->state.depth_check) {
show->state.depth_check = show->state.depth + 1;
show->state.depth_to_show = 0;
}
__btf_struct_show(btf, t, type_id, data, bits_offset, show); /* Restore saved member data here */
show->state.member = m; if (show->state.depth_check != show->state.depth + 1) return;
show->state.depth_check = 0;
if (show->state.depth_to_show <= show->state.depth) return; /* * Reaching here indicates we have recursed and found * non-zero child values.
*/
}
/* enum type either no name or a valid one */ if (t->name_off &&
!btf_name_valid_identifier(env->btf, t->name_off)) {
btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL;
}
btf_verifier_log_type(env, t, NULL);
for (i = 0; i < nr_enums; i++) { if (!btf_name_offset_valid(btf, enums[i].name_off)) {
btf_verifier_log(env, "\tInvalid name_offset:%u",
enums[i].name_off); return -EINVAL;
}
/* enum member must have a valid name */ if (!enums[i].name_off ||
!btf_name_valid_identifier(btf, enums[i].name_off)) {
btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL;
}
/* enum type either no name or a valid one */ if (t->name_off &&
!btf_name_valid_identifier(env->btf, t->name_off)) {
btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL;
}
btf_verifier_log_type(env, t, NULL);
for (i = 0; i < nr_enums; i++) { if (!btf_name_offset_valid(btf, enums[i].name_off)) {
btf_verifier_log(env, "\tInvalid name_offset:%u",
enums[i].name_off); return -EINVAL;
}
/* enum member must have a valid name */ if (!enums[i].name_off ||
!btf_name_valid_identifier(btf, enums[i].name_off)) {
btf_verifier_log_type(env, t, "Invalid name"); return -EINVAL;
}
staticconststruct btf_kind_operations func_proto_ops = {
.check_meta = btf_func_proto_check_meta,
.resolve = btf_df_resolve, /* * BTF_KIND_FUNC_PROTO cannot be directly referred by * a struct's member. * * It should be a function pointer instead. * (i.e. struct's member -> BTF_KIND_PTR -> BTF_KIND_FUNC_PROTO) * * Hence, there is no btf_func_check_member().
*/
.check_member = btf_df_check_member,
.check_kflag_member = btf_df_check_kflag_member,
.log_details = btf_func_proto_log,
.show = btf_df_show,
};
for_each_vsi(i, t, vsi) { /* A var cannot be in type void */ if (!vsi->type || !BTF_TYPE_ID_VALID(vsi->type)) {
btf_verifier_log_vsi(env, t, vsi, "Invalid type_id"); return -EINVAL;
}
/* Different architectures have different alignment requirements, so * here we check only for the reasonable minimum. This way we ensure * that types after CO-RE can pass the kernel BTF verifier.
*/
align_bytes = min_t(u64, sizeof(void *), member_type->size);
align_bits = align_bytes * BITS_PER_BYTE;
div64_u64_rem(member->offset, align_bits, &misalign_bits); if (misalign_bits) {
btf_verifier_log_member(env, struct_type, member, "Member is not properly aligned"); return -EINVAL;
}
if (!env_type_is_resolve_sink(env, next_type) &&
!env_type_is_resolved(env, next_type_id)) return env_stack_push(env, next_type, next_type_id);
component_idx = btf_type_decl_tag(t)->component_idx; if (component_idx != -1) { if (btf_type_is_var(next_type) || btf_type_is_typedef(next_type)) {
btf_verifier_log_type(env, v->t, "Invalid component_idx"); return -EINVAL;
}
if (btf_type_is_struct(next_type)) {
vlen = btf_type_vlen(next_type);
} else { /* next_type should be a function */
next_type = btf_type_by_id(btf, next_type->type);
vlen = btf_type_vlen(next_type);
}
if (btf_type_needs_resolve(ret_type) &&
!env_type_is_resolved(env, ret_type_id)) {
err = btf_resolve(env, ret_type, ret_type_id); if (err) return err;
}
/* Ensure the return type is a type that has a size */ if (!btf_type_id_size(btf, &ret_type_id, NULL)) {
btf_verifier_log_type(env, t, "Invalid return type"); return -EINVAL;
}
}
if (!nr_args) return 0;
/* Last func arg type_id could be 0 if it is a vararg */ if (!args[nr_args - 1].type) { if (args[nr_args - 1].name_off) {
btf_verifier_log_type(env, t, "Invalid arg#%u",
nr_args); return -EINVAL;
}
nr_args--;
}
for (i = 0; i < nr_args; i++) { conststruct btf_type *arg_type;
u32 arg_type_id;
arg_type_id = args[i].type;
arg_type = btf_type_by_id(btf, arg_type_id); if (!arg_type) {
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); return -EINVAL;
}
if (btf_type_is_resolve_source_only(arg_type)) {
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); return -EINVAL;
}
if (args[i].name_off &&
(!btf_name_offset_valid(btf, args[i].name_off) ||
!btf_name_valid_identifier(btf, args[i].name_off))) {
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); return -EINVAL;
}
if (btf_type_needs_resolve(arg_type) &&
!env_type_is_resolved(env, arg_type_id)) {
err = btf_resolve(env, arg_type, arg_type_id); if (err) return err;
}
if (!btf_type_id_size(btf, &arg_type_id, NULL)) {
btf_verifier_log_type(env, t, "Invalid arg#%u", i + 1); return -EINVAL;
}
}
/* Populate the secs from hdr */ for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++)
secs[i] = *(struct btf_sec_info *)((void *)hdr +
btf_sec_info_offset[i]);
/* Check for gaps and overlap among sections */
total = 0;
expected_total = btf_data_size - hdr->hdr_len; for (i = 0; i < ARRAY_SIZE(btf_sec_info_offset); i++) { if (expected_total < secs[i].off) {
btf_verifier_log(env, "Invalid section offset"); return -EINVAL;
} if (total < secs[i].off) { /* gap */
btf_verifier_log(env, "Unsupported section found"); return -EINVAL;
} if (total > secs[i].off) {
btf_verifier_log(env, "Section overlap found"); return -EINVAL;
} if (expected_total - total < secs[i].len) {
btf_verifier_log(env, "Total section length too long"); return -EINVAL;
}
total += secs[i].len;
}
/* There is data other than hdr and known sections */ if (expected_total != total) {
btf_verifier_log(env, "Unsupported section found"); return -EINVAL;
}
for (i = 0; i < ARRAY_SIZE(alloc_obj_fields); i++) { /* Try to find whether this special type exists in user BTF, and * if so remember its ID so we can easily find it among members * of structs that we iterate in the next loop.
*/ struct btf_id_set *new_aof;
id = btf_find_by_name_kind(btf, alloc_obj_fields[i], BTF_KIND_STRUCT); if (id < 0) continue;
n = btf_nr_types(btf); for (i = 1; i < n; i++) { /* Try to find if there are kptrs in user BTF and remember their ID */ struct btf_id_set *new_aof; struct btf_field_info tmp; conststruct btf_type *t;
t = btf_type_by_id(btf, i); if (!t) {
ret = -EINVAL; goto free_aof;
}
ret = btf_find_kptr(btf, t, 0, 0, &tmp, BPF_KPTR); if (ret != BTF_FIELD_FOUND) continue;
if (attr->btf_size > BTF_MAX_SIZE) return ERR_PTR(-E2BIG);
env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); if (!env) return ERR_PTR(-ENOMEM);
/* user could have requested verbose verifier output * and supplied buffer to store the verification trace
*/
err = bpf_vlog_init(&env->log, attr->btf_log_level,
log_ubuf, attr->btf_log_size); if (err) goto errout_free;
conv_struct = bpf_ctx_convert.t; if (!conv_struct) return NULL; /* prog_type is valid bpf program type. No need for bounds check. */
ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2; /* ctx_type is a pointer to prog_ctx_type in vmlinux. * Like 'struct __sk_buff'
*/ return btf_type_by_id(btf_vmlinux, ctx_type->type);
}
conv_struct = bpf_ctx_convert.t; if (!conv_struct) return -EFAULT; /* prog_type is valid bpf program type. No need for bounds check. */
ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1; /* ctx_type is a pointer to prog_ctx_type in vmlinux. * Like 'struct sk_buff'
*/ return ctx_type->type;
}
/* KPROBE programs allow bpf_user_pt_regs_t typedef, which we need to * check before we skip all the typedef below.
*/ if (prog_type == BPF_PROG_TYPE_KPROBE) { while (btf_type_is_modifier(t) && !btf_type_is_typedef(t))
t = btf_type_by_id(btf, t->type);
if (btf_type_is_typedef(t)) {
tname = btf_name_by_offset(btf, t->name_off); if (tname && strcmp(tname, "bpf_user_pt_regs_t") == 0) returntrue;
}
}
while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type); if (!btf_type_is_struct(t)) { /* Only pointer to struct is supported for now. * That means that BPF_PROG_TYPE_TRACEPOINT with BTF * is not supported yet. * BPF_PROG_TYPE_RAW_TRACEPOINT is fine.
*/ returnfalse;
}
tname = btf_name_by_offset(btf, t->name_off); if (!tname) {
bpf_log(log, "arg#%d struct doesn't have a name\n", arg); returnfalse;
}
ctx_type = find_canonical_prog_ctx_type(prog_type); if (!ctx_type) {
bpf_log(log, "btf_vmlinux is malformed\n"); /* should not happen */ returnfalse;
}
again:
ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_type->name_off); if (!ctx_tname) { /* should not happen */
bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n"); returnfalse;
} /* program types without named context types work only with arg:ctx tag */ if (ctx_tname[0] == '\0') returnfalse; /* only compare that prog's ctx type name is the same as * kernel expects. No need to compare field by field. * It's ok for bpf prog to do: * struct __sk_buff {}; * int socket_filter_bpf_prog(struct __sk_buff *skb) * { // no fields of skb are ever used }
*/ if (btf_is_projection_of(ctx_tname, tname)) returntrue; if (strcmp(ctx_tname, tname)) { /* bpf_user_pt_regs_t is a typedef, so resolve it to * underlying struct and check name again
*/ if (!btf_type_is_modifier(ctx_type)) returnfalse; while (btf_type_is_modifier(ctx_type))
ctx_type = btf_type_by_id(btf_vmlinux, ctx_type->type); goto again;
} returntrue;
}
/* forward declarations for arch-specific underlying types of * bpf_user_pt_regs_t; this avoids the need for arch-specific #ifdef * compilation guards below for BPF_PROG_TYPE_PERF_EVENT checks, but still * works correctly with __builtin_types_compatible_p() on respective * architectures
*/ struct user_regs_struct; struct user_pt_regs;
if (!btf_is_ptr(t)) {
bpf_log(log, "arg#%d type isn't a pointer\n", arg); return -EINVAL;
}
t = btf_type_by_id(btf, t->type);
/* KPROBE and PERF_EVENT programs allow bpf_user_pt_regs_t typedef */ if (prog_type == BPF_PROG_TYPE_KPROBE || prog_type == BPF_PROG_TYPE_PERF_EVENT) { while (btf_type_is_modifier(t) && !btf_type_is_typedef(t))
t = btf_type_by_id(btf, t->type);
if (btf_type_is_typedef(t)) {
tname = btf_name_by_offset(btf, t->name_off); if (tname && strcmp(tname, "bpf_user_pt_regs_t") == 0) return 0;
}
}
/* all other program types don't use typedefs for context type */ while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type);
/* `void *ctx __arg_ctx` is always valid */ if (btf_type_is_void(t)) return 0;
tname = btf_name_by_offset(btf, t->name_off); if (str_is_empty(tname)) {
bpf_log(log, "arg#%d type doesn't have a name\n", arg); return -EINVAL;
}
/* special cases */ switch (prog_type) { case BPF_PROG_TYPE_KPROBE: if (__btf_type_is_struct(t) && strcmp(tname, "pt_regs") == 0) return 0; break; case BPF_PROG_TYPE_PERF_EVENT: if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct pt_regs) &&
__btf_type_is_struct(t) && strcmp(tname, "pt_regs") == 0) return 0; if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_pt_regs) &&
__btf_type_is_struct(t) && strcmp(tname, "user_pt_regs") == 0) return 0; if (__builtin_types_compatible_p(bpf_user_pt_regs_t, struct user_regs_struct) &&
__btf_type_is_struct(t) && strcmp(tname, "user_regs_struct") == 0) return 0; break; case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: /* allow u64* as ctx */ if (btf_is_int(t) && t->size == 8) return 0; break; case BPF_PROG_TYPE_TRACING: switch (attach_type) { case BPF_TRACE_RAW_TP: /* tp_btf program is TRACING, so need special case here */ if (__btf_type_is_struct(t) &&
strcmp(tname, "bpf_raw_tracepoint_args") == 0) return 0; /* allow u64* as ctx */ if (btf_is_int(t) && t->size == 8) return 0; break; case BPF_TRACE_ITER: /* allow struct bpf_iter__xxx types only */ if (__btf_type_is_struct(t) &&
strncmp(tname, "bpf_iter__", sizeof("bpf_iter__") - 1) == 0) return 0; break; case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_MODIFY_RETURN: /* allow u64* as ctx */ if (btf_is_int(t) && t->size == 8) return 0; break; default: break;
} break; case BPF_PROG_TYPE_LSM: case BPF_PROG_TYPE_STRUCT_OPS: /* allow u64* as ctx */ if (btf_is_int(t) && t->size == 8) return 0; break; case BPF_PROG_TYPE_TRACEPOINT: case BPF_PROG_TYPE_SYSCALL: case BPF_PROG_TYPE_EXT: return 0; /* anything goes */ default: break;
}
ctx_type = find_canonical_prog_ctx_type(prog_type); if (!ctx_type) { /* should not happen */
bpf_log(log, "btf_vmlinux is malformed\n"); return -EINVAL;
}
/* resolve typedefs and check that underlying structs are matching as well */ while (btf_type_is_modifier(ctx_type))
ctx_type = btf_type_by_id(btf_vmlinux, ctx_type->type);
/* if program type doesn't have distinctly named struct type for * context, then __arg_ctx argument can only be `void *`, which we * already checked above
*/ if (!__btf_type_is_struct(ctx_type)) {
bpf_log(log, "arg#%d should be void pointer\n", arg); return -EINVAL;
}
ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_type->name_off); if (!__btf_type_is_struct(t) || strcmp(ctx_tname, tname) != 0) {
bpf_log(log, "arg#%d should be `struct %s *`\n", arg, ctx_tname); return -EINVAL;
}
conv_struct = bpf_ctx_convert.t; /* get member for kernel ctx type */
kctx_member = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2 + 1;
kctx_type_id = kctx_member->type;
kctx_type = btf_type_by_id(btf_vmlinux, kctx_type_id); if (!btf_type_is_struct(kctx_type)) {
bpf_log(log, "kern ctx type id %u is not a struct\n", kctx_type_id); return -EINVAL;
}
/* If .BTF_ids section was created with distilled base BTF, both base and * split BTF ids will need to be mapped to actual base/split ids for * BTF now that it has been relocated.
*/ static __u32 btf_relocate_id(conststruct btf *btf, __u32 id)
{ if (!btf->base_btf || !btf->base_id_map) return id; return btf->base_id_map[id];
}
switch (prog->type) { case BPF_PROG_TYPE_TRACING: return atype == BPF_TRACE_RAW_TP || atype == BPF_TRACE_ITER; case BPF_PROG_TYPE_LSM: return bpf_lsm_is_trusted(prog); case BPF_PROG_TYPE_STRUCT_OPS: returntrue; default: returnfalse;
}
}
int btf_ctx_arg_offset(conststruct btf *btf, conststruct btf_type *func_proto,
u32 arg_no)
{ conststruct btf_param *args; conststruct btf_type *t; int off = 0, i;
u32 sz;
args = btf_params(func_proto); for (i = 0; i < arg_no; i++) {
t = btf_type_by_id(btf, args[i].type);
t = btf_resolve_size(btf, t, &sz); if (IS_ERR(t)) return PTR_ERR(t);
off += roundup(sz, 8);
}
if (off % 8) {
bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
tname, off); returnfalse;
}
arg = btf_ctx_arg_idx(btf, t, off);
args = (conststruct btf_param *)(t + 1); /* if (t == NULL) Fall back to default BPF prog with * MAX_BPF_FUNC_REG_ARGS u64 arguments.
*/
nr_args = t ? btf_type_vlen(t) : MAX_BPF_FUNC_REG_ARGS; if (prog->aux->attach_btf_trace) { /* skip first 'void *__data' argument in btf_trace_##name typedef */
args++;
nr_args--;
}
if (arg > nr_args) {
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
tname, arg + 1); returnfalse;
}
if (arg == nr_args) { switch (prog->expected_attach_type) { case BPF_LSM_MAC: /* mark we are accessing the return value */
info->is_retval = true;
fallthrough; case BPF_LSM_CGROUP: case BPF_TRACE_FEXIT: /* When LSM programs are attached to void LSM hooks * they use FEXIT trampolines and when attached to * int LSM hooks, they use MODIFY_RETURN trampolines. * * While the LSM programs are BPF_MODIFY_RETURN-like * the check: * * if (ret_type != 'int') * return -EINVAL; * * is _not_ done here. This is still safe as LSM hooks * have only void and int return types.
*/ if (!t) returntrue;
t = btf_type_by_id(btf, t->type); break; case BPF_MODIFY_RETURN: /* For now the BPF_MODIFY_RETURN can only be attached to * functions that return an int.
*/ if (!t) returnfalse;
t = btf_type_skip_modifiers(btf, t->type, NULL); if (!btf_type_is_small_int(t)) {
bpf_log(log, "ret type %s not allowed for fmod_ret\n",
btf_type_str(t)); returnfalse;
} break; default:
bpf_log(log, "func '%s' doesn't have %d-th argument\n",
tname, arg + 1); returnfalse;
}
} else { if (!t) /* Default prog with MAX_BPF_FUNC_REG_ARGS args */ returntrue;
t = btf_type_by_id(btf, args[arg].type);
}
/* skip modifiers */ while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type); if (btf_type_is_small_int(t) || btf_is_any_enum(t) || __btf_type_is_struct(t)) /* accessing a scalar */ returntrue; if (!btf_type_is_ptr(t)) {
bpf_log(log, "func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n",
tname, arg,
__btf_name_by_offset(btf, t->name_off),
btf_type_str(t)); returnfalse;
}
if (size != sizeof(u64)) {
bpf_log(log, "func '%s' size %d must be 8\n",
tname, size); returnfalse;
}
/* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { conststruct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
u32 type, flag;
type = base_type(ctx_arg_info->reg_type);
flag = type_flag(ctx_arg_info->reg_type); if (ctx_arg_info->offset == off && type == PTR_TO_BUF &&
(flag & PTR_MAYBE_NULL)) {
info->reg_type = ctx_arg_info->reg_type; returntrue;
}
}
/* * If it's a pointer to void, it's the same as scalar from the verifier * safety POV. Either way, no futher pointer walking is allowed.
*/ if (is_void_or_int_ptr(btf, t)) returntrue;
/* this is a pointer to another type */ for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { conststruct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
if (ctx_arg_info->offset == off) { if (!ctx_arg_info->btf_id) {
bpf_log(log,"invalid btf_id for context argument offset %u\n", off); returnfalse;
}
/* BTF lookups cannot fail, return false on error */
t = btf_type_by_id(btf, prog->aux->attach_btf_id); if (!t) returnfalse;
tname = btf_name_by_offset(btf, t->name_off); if (!tname) returnfalse; /* Checked by bpf_check_attach_target */
tname += sizeof("btf_trace_") - 1; for (i = 0; i < ARRAY_SIZE(raw_tp_null_args); i++) { /* Is this a func with potential NULL args? */ if (strcmp(tname, raw_tp_null_args[i].func)) continue; if (raw_tp_null_args[i].mask & (0x1ULL << (arg * 4)))
info->reg_type |= PTR_MAYBE_NULL; /* Is the current arg IS_ERR? */ if (raw_tp_null_args[i].mask & (0x2ULL << (arg * 4)))
ptr_err_raw_tp = true; break;
} /* If we don't know NULL-ness specification and the tracepoint * is coming from a loadable module, be conservative and mark * argument as PTR_MAYBE_NULL.
*/ if (i == ARRAY_SIZE(raw_tp_null_args) && btf_is_module(btf))
info->reg_type |= PTR_MAYBE_NULL;
}
info->btf = btf;
info->btf_id = t->type;
t = btf_type_by_id(btf, t->type);
if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) {
tag_value = __btf_name_by_offset(btf, t->name_off); if (strcmp(tag_value, "user") == 0)
info->reg_type |= MEM_USER; if (strcmp(tag_value, "percpu") == 0)
info->reg_type |= MEM_PERCPU;
}
/* skip modifiers */ while (btf_type_is_modifier(t)) {
info->btf_id = t->type;
t = btf_type_by_id(btf, t->type);
} if (!btf_type_is_struct(t)) {
bpf_log(log, "func '%s' arg%d type %s is not a struct\n",
tname, arg, btf_type_str(t)); returnfalse;
}
bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n",
tname, arg, info->btf_id, btf_type_str(t),
__btf_name_by_offset(btf, t->name_off));
/* Perform all checks on the validity of type for this argument, but if * we know it can be IS_ERR at runtime, scrub pointer type and mark as * scalar.
*/ if (ptr_err_raw_tp) {
bpf_log(log, "marking pointer arg%d as scalar as it may encode error", arg);
info->reg_type = SCALAR_VALUE;
} returntrue;
}
EXPORT_SYMBOL_GPL(btf_ctx_access);
again: if (btf_type_is_modifier(t))
t = btf_type_skip_modifiers(btf, t->type, NULL);
tname = __btf_name_by_offset(btf, t->name_off); if (!btf_type_is_struct(t)) {
bpf_log(log, "Type '%s' is not a struct\n", tname); return -EINVAL;
}
vlen = btf_type_vlen(t); if (BTF_INFO_KIND(t->info) == BTF_KIND_UNION && vlen != 1 && !(*flag & PTR_UNTRUSTED)) /* * walking unions yields untrusted pointers * with exception of __bpf_md_ptr and other * unions with a single member
*/
*flag |= PTR_UNTRUSTED;
if (off + size > t->size) { /* If the last element is a variable size array, we may * need to relax the rule.
*/ struct btf_array *array_elem;
if (vlen == 0) goto error;
member = btf_type_member(t) + vlen - 1;
mtype = btf_type_skip_modifiers(btf, member->type,
NULL); if (!btf_type_is_array(mtype)) goto error;
/* allow structure and integer */
t = btf_type_skip_modifiers(btf, array_elem->type,
NULL);
if (btf_type_is_int(t)) return WALK_SCALAR;
if (!btf_type_is_struct(t)) goto error;
off = (off - moff) % t->size; goto again;
error:
bpf_log(log, "access beyond struct %s at off %u size %u\n",
tname, off, size); return -EACCES;
}
for_each_member(i, t, member) { /* offset of the field in bytes */
moff = __btf_member_bit_offset(t, member) / 8; if (off + size <= moff) /* won't find anything, field is already too far */ break;
/* off <= moff instead of off == moff because clang * does not generate a BTF member for anonymous * bitfield like the ":16" here: * struct { * int :16; * int x:8; * };
*/ if (off <= moff &&
BITS_ROUNDUP_BYTES(end_bit) <= off + size) return WALK_SCALAR;
/* off may be accessing a following member * * or * * Doing partial access at either end of this * bitfield. Continue on this case also to * treat it as not accessing this bitfield * and eventually error out as field not * found to keep it simple. * It could be relaxed if there was a legit * partial access case later.
*/ continue;
}
/* In case of "off" is pointing to holes of a struct */ if (off < moff) break;
/* type of the field */
mid = member->type;
mtype = btf_type_by_id(btf, member->type);
mname = __btf_name_by_offset(btf, member->name_off);
mtrue_end = moff + msize; if (off >= mtrue_end) /* no overlap with member, keep iterating */ continue;
if (btf_type_is_array(mtype)) {
u32 elem_idx;
/* __btf_resolve_size() above helps to * linearize a multi-dimensional array. * * The logic here is treating an array * in a struct as the following way: * * struct outer { * struct inner array[2][2]; * }; * * looks like: * * struct outer { * struct inner array_elem0; * struct inner array_elem1; * struct inner array_elem2; * struct inner array_elem3; * }; * * When accessing outer->array[1][0], it moves * moff to "array_elem2", set mtype to * "struct inner", and msize also becomes * sizeof(struct inner). Then most of the * remaining logic will fall through without * caring the current member is an array or * not. * * Unlike mtype/msize/moff, mtrue_end does not * change. The naming difference ("_true") tells * that it is not always corresponding to * the current mtype/msize/moff. * It is the true end of the current * member (i.e. array in this case). That * will allow an int array to be accessed like * a scratch space, * i.e. allow access beyond the size of * the array's element as long as it is * within the mtrue_end boundary.
*/
/* skip empty array */ if (moff == mtrue_end) continue;
/* the 'off' we're looking for is either equal to start * of this field or inside of this struct
*/ if (btf_type_is_struct(mtype)) { /* our field must be inside that union or struct */
t = mtype;
/* return if the offset matches the member offset */ if (off == moff) {
*next_btf_id = mid; return WALK_STRUCT;
}
/* adjust offset we're looking for */
off -= moff; goto again;
}
if (msize != size || off != moff) {
bpf_log(log, "cannot access ptr member %s with moff %u in struct %s with off %u size %u\n",
mname, moff, tname, off, size); return -EACCES;
}
/* check type tag */
t = btf_type_by_id(btf, mtype->type); if (btf_type_is_type_tag(t) && !btf_type_kflag(t)) {
tag_value = __btf_name_by_offset(btf, t->name_off); /* check __user tag */ if (strcmp(tag_value, "user") == 0)
tmp_flag = MEM_USER; /* check __percpu tag */ if (strcmp(tag_value, "percpu") == 0)
tmp_flag = MEM_PERCPU; /* check __rcu tag */ if (strcmp(tag_value, "rcu") == 0)
tmp_flag = MEM_RCU;
}
/* Allow more flexible access within an int as long as * it is within mtrue_end. * Since mtrue_end could be the end of an array, * that also allows using an array of int as a scratch * space. e.g. skb->cb[].
*/ if (off + size > mtrue_end && !(*flag & PTR_UNTRUSTED)) {
bpf_log(log, "access beyond the end of member %s (mend:%u) in struct %s with off %u size %u\n",
mname, mtrue_end, tname, off, size); return -EACCES;
}
return WALK_SCALAR;
}
bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off); return -EINVAL;
}
int btf_struct_access(struct bpf_verifier_log *log, conststruct bpf_reg_state *reg, int off, int size, enum bpf_access_type atype __maybe_unused,
u32 *next_btf_id, enum bpf_type_flag *flag, constchar **field_name)
{ conststruct btf *btf = reg->btf; enum bpf_type_flag tmp_flag = 0; conststruct btf_type *t;
u32 id = reg->btf_id; int err;
while (type_is_alloc(reg->type)) { struct btf_struct_meta *meta; struct btf_record *rec; int i;
meta = btf_find_struct_meta(btf, id); if (!meta) break;
rec = meta->record; for (i = 0; i < rec->cnt; i++) { struct btf_field *field = &rec->fields[i];
u32 offset = field->offset; if (off < offset + field->size && offset < off + size) {
bpf_log(log, "direct access to %s is disallowed\n",
btf_field_type_name(field->type)); return -EACCES;
}
} break;
}
t = btf_type_by_id(btf, id); do {
err = btf_struct_walk(log, btf, t, off, size, &id, &tmp_flag, field_name);
switch (err) { case WALK_PTR: /* For local types, the destination register cannot * become a pointer again.
*/ if (type_is_alloc(reg->type)) return SCALAR_VALUE; /* If we found the pointer or scalar on t+off, * we're done.
*/
*next_btf_id = id;
*flag = tmp_flag; return PTR_TO_BTF_ID; case WALK_PTR_UNTRUSTED:
*flag = MEM_RDONLY | PTR_UNTRUSTED; return PTR_TO_MEM; case WALK_SCALAR: return SCALAR_VALUE; case WALK_STRUCT: /* We found nested struct, so continue the search * by diving in it. At this point the offset is * aligned with the new type, so set it to 0.
*/
t = btf_type_by_id(btf, id);
off = 0; break; default: /* It's either error or unknown return value.. * scream and leave.
*/ if (WARN_ONCE(err > 0, "unknown btf_struct_walk return value")) return -EINVAL; return err;
}
} while (t);
return -EINVAL;
}
/* Check that two BTF types, each specified as an BTF object + id, are exactly * the same. Trivial ID check is not enough due to module BTFs, because we can * end up with two different module BTFs, but IDs point to the common type in * vmlinux BTF.
*/ bool btf_types_are_same(conststruct btf *btf1, u32 id1, conststruct btf *btf2, u32 id2)
{ if (id1 != id2) returnfalse; if (btf1 == btf2) returntrue; return btf_type_by_id(btf1, id1) == btf_type_by_id(btf2, id2);
}
bool btf_struct_ids_match(struct bpf_verifier_log *log, conststruct btf *btf, u32 id, int off, conststruct btf *need_btf, u32 need_type_id, bool strict)
{ conststruct btf_type *type; enum bpf_type_flag flag = 0; int err;
/* Are we already done? */ if (off == 0 && btf_types_are_same(btf, id, need_btf, need_type_id)) returntrue; /* In case of strict type match, we do not walk struct, the top level * type match must succeed. When strict is true, off should have already * been 0.
*/ if (strict) returnfalse;
again:
type = btf_type_by_id(btf, id); if (!type) returnfalse;
err = btf_struct_walk(log, btf, type, off, 1, &id, &flag, NULL); if (err != WALK_STRUCT) returnfalse;
/* We found nested struct object. If it matches * the requested ID, we're done. Otherwise let's * continue the search with offset 0 in the new * type.
*/ if (!btf_types_are_same(btf, id, need_btf, need_type_id)) {
off = 0; goto again;
}
if (__btf_type_is_struct(t))
flags |= BTF_FMODEL_STRUCT_ARG; if (btf_type_is_signed_int(t))
flags |= BTF_FMODEL_SIGNED_ARG;
return flags;
}
int btf_distill_func_proto(struct bpf_verifier_log *log, struct btf *btf, conststruct btf_type *func, constchar *tname, struct btf_func_model *m)
{ conststruct btf_param *args; conststruct btf_type *t;
u32 i, nargs; int ret;
if (!func) { /* BTF function prototype doesn't match the verifier types. * Fall back to MAX_BPF_FUNC_REG_ARGS u64 args.
*/ for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
m->arg_size[i] = 8;
m->arg_flags[i] = 0;
}
m->ret_size = 8;
m->ret_flags = 0;
m->nr_args = MAX_BPF_FUNC_REG_ARGS; return 0;
}
args = (conststruct btf_param *)(func + 1);
nargs = btf_type_vlen(func); if (nargs > MAX_BPF_FUNC_ARGS) {
bpf_log(log, "The function %s has %d arguments. Too many.\n",
tname, nargs); return -EINVAL;
}
ret = __get_type_size(btf, func->type, &t); if (ret < 0 || __btf_type_is_struct(t)) {
bpf_log(log, "The function %s return type %s is unsupported.\n",
tname, btf_type_str(t)); return -EINVAL;
}
m->ret_size = ret;
m->ret_flags = __get_type_fmodel_flags(t);
for (i = 0; i < nargs; i++) { if (i == nargs - 1 && args[i].type == 0) {
bpf_log(log, "The function %s with variable args is unsupported.\n",
tname); return -EINVAL;
}
ret = __get_type_size(btf, args[i].type, &t);
/* No support of struct argument size greater than 16 bytes */ if (ret < 0 || ret > 16) {
bpf_log(log, "The function %s arg%d type %s is unsupported.\n",
tname, i, btf_type_str(t)); return -EINVAL;
} if (ret == 0) {
bpf_log(log, "The function %s has malformed void argument.\n",
tname); return -EINVAL;
}
m->arg_size[i] = ret;
m->arg_flags[i] = __get_type_fmodel_flags(t);
}
m->nr_args = nargs; return 0;
}
/* Compare BTFs of two functions assuming only scalars and pointers to context. * t1 points to BTF_KIND_FUNC in btf1 * t2 points to BTF_KIND_FUNC in btf2 * Returns: * EINVAL - function prototype mismatch * EFAULT - verifier bug * 0 - 99% match. The last 1% is validated by the verifier.
*/ staticint btf_check_func_type_match(struct bpf_verifier_log *log, struct btf *btf1, conststruct btf_type *t1, struct btf *btf2, conststruct btf_type *t2)
{ conststruct btf_param *args1, *args2; constchar *fn1, *fn2, *s1, *s2;
u32 nargs1, nargs2, i;
if (btf_func_linkage(t1) != BTF_FUNC_GLOBAL) {
bpf_log(log, "%s() is not a global function\n", fn1); return -EINVAL;
} if (btf_func_linkage(t2) != BTF_FUNC_GLOBAL) {
bpf_log(log, "%s() is not a global function\n", fn2); return -EINVAL;
}
t1 = btf_type_by_id(btf1, t1->type); if (!t1 || !btf_type_is_func_proto(t1)) return -EFAULT;
t2 = btf_type_by_id(btf2, t2->type); if (!t2 || !btf_type_is_func_proto(t2)) return -EFAULT;
if (nargs1 != nargs2) {
bpf_log(log, "%s() has %d args while %s() has %d args\n",
fn1, nargs1, fn2, nargs2); return -EINVAL;
}
t1 = btf_type_skip_modifiers(btf1, t1->type, NULL);
t2 = btf_type_skip_modifiers(btf2, t2->type, NULL); if (t1->info != t2->info) {
bpf_log(log, "Return type %s of %s() doesn't match type %s of %s()\n",
btf_type_str(t1), fn1,
btf_type_str(t2), fn2); return -EINVAL;
}
for (i = 0; i < nargs1; i++) {
t1 = btf_type_skip_modifiers(btf1, args1[i].type, NULL);
t2 = btf_type_skip_modifiers(btf2, args2[i].type, NULL);
if (t1->info != t2->info) {
bpf_log(log, "arg%d in %s() is %s while %s() has %s\n",
i, fn1, btf_type_str(t1),
fn2, btf_type_str(t2)); return -EINVAL;
} if (btf_type_has_size(t1) && t1->size != t2->size) {
bpf_log(log, "arg%d in %s() has size %d while %s() has %d\n",
i, fn1, t1->size,
fn2, t2->size); return -EINVAL;
}
/* global functions are validated with scalars and pointers * to context only. And only global functions can be replaced. * Hence type check only those types.
*/ if (btf_type_is_int(t1) || btf_is_any_enum(t1)) continue; if (!btf_type_is_ptr(t1)) {
bpf_log(log, "arg%d in %s() has unrecognized type\n",
i, fn1); return -EINVAL;
}
t1 = btf_type_skip_modifiers(btf1, t1->type, NULL);
t2 = btf_type_skip_modifiers(btf2, t2->type, NULL); if (!btf_type_is_struct(t1)) {
bpf_log(log, "arg%d in %s() is not a pointer to context\n",
i, fn1); return -EINVAL;
} if (!btf_type_is_struct(t2)) {
bpf_log(log, "arg%d in %s() is not a pointer to context\n",
i, fn2); return -EINVAL;
} /* This is an optional check to make program writing easier. * Compare names of structs and report an error to the user. * btf_prepare_func_args() already checked that t2 struct * is a context type. btf_prepare_func_args() will check * later that t1 struct is a context type as well.
*/
s1 = btf_name_by_offset(btf1, t1->name_off);
s2 = btf_name_by_offset(btf2, t2->name_off); if (strcmp(s1, s2)) {
bpf_log(log, "arg%d %s(struct %s *) doesn't match %s(struct %s *)\n",
i, fn1, s1, fn2, s2); return -EINVAL;
}
} return 0;
}
/* Compare BTFs of given program with BTF of target program */ int btf_check_type_match(struct bpf_verifier_log *log, conststruct bpf_prog *prog, struct btf *btf2, conststruct btf_type *t2)
{ struct btf *btf1 = prog->aux->btf; conststruct btf_type *t1;
u32 btf_id = 0;
/* Process BTF of a function to produce high-level expectation of function * arguments (like ARG_PTR_TO_CTX, or ARG_PTR_TO_MEM, etc). This information * is cached in subprog info for reuse. * Returns: * EFAULT - there is a verifier bug. Abort verification. * EINVAL - cannot convert BTF. * 0 - Successfully processed BTF and constructed argument expectations.
*/ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
{ bool is_global = subprog_aux(env, subprog)->linkage == BTF_FUNC_GLOBAL; struct bpf_subprog_info *sub = subprog_info(env, subprog); struct bpf_verifier_log *log = &env->log; struct bpf_prog *prog = env->prog; enum bpf_prog_type prog_type = prog->type; struct btf *btf = prog->aux->btf; conststruct btf_param *args; conststruct btf_type *t, *ref_t, *fn_t;
u32 i, nargs, btf_id; constchar *tname;
if (sub->args_cached) return 0;
if (!prog->aux->func_info) {
verifier_bug(env, "func_info undefined"); return -EFAULT;
}
btf_id = prog->aux->func_info[subprog].type_id; if (!btf_id) { if (!is_global) /* not fatal for static funcs */ return -EINVAL;
bpf_log(log, "Global functions need valid BTF\n"); return -EFAULT;
}
fn_t = btf_type_by_id(btf, btf_id); if (!fn_t || !btf_type_is_func(fn_t)) { /* These checks were already done by the verifier while loading * struct bpf_func_info
*/
bpf_log(log, "BTF of func#%d doesn't point to KIND_FUNC\n",
subprog); return -EFAULT;
}
tname = btf_name_by_offset(btf, fn_t->name_off);
if (prog->aux->func_info_aux[subprog].unreliable) {
verifier_bug(env, "unreliable BTF for function %s()", tname); return -EFAULT;
} if (prog_type == BPF_PROG_TYPE_EXT)
prog_type = prog->aux->dst_prog->type;
t = btf_type_by_id(btf, fn_t->type); if (!t || !btf_type_is_func_proto(t)) {
bpf_log(log, "Invalid type of function %s()\n", tname); return -EFAULT;
}
args = (conststruct btf_param *)(t + 1);
nargs = btf_type_vlen(t); if (nargs > MAX_BPF_FUNC_REG_ARGS) { if (!is_global) return -EINVAL;
bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n",
tname, nargs, MAX_BPF_FUNC_REG_ARGS); return -EINVAL;
} /* check that function returns int, exception cb also requires this */
t = btf_type_by_id(btf, t->type); while (btf_type_is_modifier(t))
t = btf_type_by_id(btf, t->type); if (!btf_type_is_int(t) && !btf_is_any_enum(t)) { if (!is_global) return -EINVAL;
bpf_log(log, "Global function %s() doesn't return scalar. Only those are supported.\n",
tname); return -EINVAL;
} /* Convert BTF function arguments into verifier types. * Only PTR_TO_CTX and SCALAR are supported atm.
*/ for (i = 0; i < nargs; i++) {
u32 tags = 0; int id = 0;
/* 'arg:<tag>' decl_tag takes precedence over derivation of * register type from BTF type itself
*/ while ((id = btf_find_next_decl_tag(btf, fn_t, i, "arg:", id)) > 0) { conststruct btf_type *tag_t = btf_type_by_id(btf, id); constchar *tag = __btf_name_by_offset(btf, tag_t->name_off) + 4;
/* disallow arg tags in static subprogs */ if (!is_global) {
bpf_log(log, "arg#%d type tag is not supported in static functions\n", i); return -EOPNOTSUPP;
}
kern_type_id = btf_get_ptr_to_btf_id(log, i, btf, t); if (kern_type_id < 0) return kern_type_id;
vmlinux_btf = bpf_get_btf_vmlinux();
ref_t = btf_type_by_id(vmlinux_btf, kern_type_id); if (!btf_type_is_struct(ref_t)) {
tname = __btf_name_by_offset(vmlinux_btf, t->name_off);
bpf_log(log, "arg#%d has type %s '%s', but only struct or primitive types are allowed\n",
i, btf_type_str(ref_t), tname); return -EINVAL;
}
sub->args[i].arg_type = ARG_PTR_TO_BTF_ID | PTR_UNTRUSTED;
sub->args[i].btf_id = kern_type_id; continue;
} if (tags & ARG_TAG_ARENA) { if (tags & ~ARG_TAG_ARENA) {
bpf_log(log, "arg#%d arena cannot be combined with any other tags\n", i); return -EINVAL;
}
sub->args[i].arg_type = ARG_PTR_TO_ARENA; continue;
} if (is_global) { /* generic user data pointer */
u32 mem_size;
if (tags & ARG_TAG_NULLABLE) {
bpf_log(log, "arg#%d has invalid combination of tags\n", i); return -EINVAL;
}
t = btf_type_skip_modifiers(btf, t->type, NULL);
ref_t = btf_resolve_size(btf, t, &mem_size); if (IS_ERR(ref_t)) {
bpf_log(log, "arg#%d reference type('%s %s') size cannot be determined: %ld\n",
i, btf_type_str(t), btf_name_by_offset(btf, t->name_off),
PTR_ERR(ref_t)); return -EINVAL;
}
skip_pointer: if (tags) {
bpf_log(log, "arg#%d has pointer tag, but is not a pointer type\n", i); return -EINVAL;
} if (btf_type_is_int(t) || btf_is_any_enum(t)) {
sub->args[i].arg_type = ARG_ANYTHING; continue;
} if (!is_global) return -EINVAL;
bpf_log(log, "Arg#%d type %s in %s() is not supported yet.\n",
i, btf_type_str(t), tname); return -EINVAL;
}
if (uname) { if (uname_len >= name_len + 1) { if (copy_to_user(uname, btf->name, name_len + 1)) return -EFAULT;
} else { char zero = '\0';
if (copy_to_user(uname, btf->name, uname_len - 1)) return -EFAULT; if (put_user(zero, uname + uname_len - 1)) return -EFAULT; /* let user-space know about too short buffer */
ret = -ENOSPC;
}
}
if (copy_to_user(uinfo, &info, info_copy) ||
put_user(info_copy, &uattr->info.info_len)) return -EFAULT;
return ret;
}
int btf_get_fd_by_id(u32 id)
{ struct btf *btf; int fd;
/* We must only consider module whose __init routine has * finished, hence we must check for BTF_MODULE_F_LIVE flag, * which is set from the notifier callback for * MODULE_STATE_LIVE.
*/ if ((btf_mod->flags & BTF_MODULE_F_LIVE) && try_module_get(btf_mod->module))
res = btf_mod->module;
break;
}
mutex_unlock(&btf_module_mutex); #endif
return res;
}
/* Returns struct btf corresponding to the struct module. * This function can return NULL or ERR_PTR.
*/ staticstruct btf *btf_get_module_btf(conststruct module *module)
{ #ifdef CONFIG_DEBUG_INFO_BTF_MODULES struct btf_module *btf_mod, *tmp; #endif struct btf *btf = NULL;
if (!module) {
btf = bpf_get_btf_vmlinux(); if (!IS_ERR_OR_NULL(btf))
btf_get(btf); return btf;
}
/* Validate well-formedness of iter argument type. * On success, return positive BTF ID of iter state's STRUCT type. * On error, negative error is returned.
*/ int btf_check_iter_arg(struct btf *btf, conststruct btf_type *func, int arg_idx)
{ conststruct btf_param *arg; conststruct btf_type *t; constchar *name; int btf_id;
if (btf_type_vlen(func) <= arg_idx) return -EINVAL;
arg = &btf_params(func)[arg_idx];
t = btf_type_skip_modifiers(btf, arg->type, NULL); if (!t || !btf_type_is_ptr(t)) return -EINVAL;
t = btf_type_skip_modifiers(btf, t->type, &btf_id); if (!t || !__btf_type_is_struct(t)) return -EINVAL;
name = btf_name_by_offset(btf, t->name_off); if (!name || strncmp(name, ITER_PREFIX, sizeof(ITER_PREFIX) - 1)) return -EINVAL;
/* sizeof(struct bpf_iter_<type>) should be a multiple of 8 to * fit nicely in stack slots
*/
t = btf_type_by_id(btf, btf_id); if (t->size == 0 || (t->size % 8)) return -EINVAL;
if (hook >= BTF_KFUNC_HOOK_MAX) {
ret = -EINVAL; goto end;
}
if (!add_set->cnt) return 0;
tab = btf->kfunc_set_tab;
if (tab && add_filter) {
u32 i;
hook_filter = &tab->hook_filters[hook]; for (i = 0; i < hook_filter->nr_filters; i++) { if (hook_filter->filters[i] == kset->filter) {
add_filter = false; break;
}
}
if (add_filter && hook_filter->nr_filters == BTF_KFUNC_FILTER_MAX_CNT) {
ret = -E2BIG; goto end;
}
}
if (!tab) {
tab = kzalloc(sizeof(*tab), GFP_KERNEL | __GFP_NOWARN); if (!tab) return -ENOMEM;
btf->kfunc_set_tab = tab;
}
set = tab->sets[hook]; /* Warn when register_btf_kfunc_id_set is called twice for the same hook * for module sets.
*/ if (WARN_ON_ONCE(set && !vmlinux_set)) {
ret = -EINVAL; goto end;
}
/* In case of vmlinux sets, there may be more than one set being * registered per hook. To create a unified set, we allocate a new set * and concatenate all individual sets being registered. While each set * is individually sorted, they may become unsorted when concatenated, * hence re-sorting the final set again is required to make binary * searching the set using btf_id_set8_contains function work. * * For module sets, we need to allocate as we may need to relocate * BTF ids.
*/
set_cnt = set ? set->cnt : 0;
if (set_cnt > U32_MAX - add_set->cnt) {
ret = -EOVERFLOW; goto end;
}
if (set_cnt + add_set->cnt > BTF_KFUNC_SET_MAX_CNT) {
ret = -E2BIG; goto end;
}
/* Grow set */
set = krealloc(tab->sets[hook],
struct_size(set, pairs, set_cnt + add_set->cnt),
GFP_KERNEL | __GFP_NOWARN); if (!set) {
ret = -ENOMEM; goto end;
}
/* For newly allocated set, initialize set->cnt to 0 */ if (!tab->sets[hook])
set->cnt = 0;
tab->sets[hook] = set;
/* Concatenate the two sets */
memcpy(set->pairs + set->cnt, add_set->pairs, add_set->cnt * sizeof(set->pairs[0])); /* Now that the set is copied, update with relocated BTF ids */ for (i = set->cnt; i < set->cnt + add_set->cnt; i++)
set->pairs[i].id = btf_relocate_id(btf, set->pairs[i].id);
if (hook >= BTF_KFUNC_HOOK_MAX) return NULL; if (!btf->kfunc_set_tab) return NULL;
hook_filter = &btf->kfunc_set_tab->hook_filters[hook]; for (i = 0; i < hook_filter->nr_filters; i++) { if (hook_filter->filters[i](prog, kfunc_btf_id)) return NULL;
}
set = btf->kfunc_set_tab->sets[hook]; if (!set) return NULL;
id = btf_id_set8_contains(set, kfunc_btf_id); if (!id) return NULL; /* The flags for BTF ID are located next to it */ return id + 1;
}
staticint bpf_prog_type_to_kfunc_hook(enum bpf_prog_type prog_type)
{ switch (prog_type) { case BPF_PROG_TYPE_UNSPEC: return BTF_KFUNC_HOOK_COMMON; case BPF_PROG_TYPE_XDP: return BTF_KFUNC_HOOK_XDP; case BPF_PROG_TYPE_SCHED_CLS: return BTF_KFUNC_HOOK_TC; case BPF_PROG_TYPE_STRUCT_OPS: return BTF_KFUNC_HOOK_STRUCT_OPS; case BPF_PROG_TYPE_TRACING: case BPF_PROG_TYPE_TRACEPOINT: case BPF_PROG_TYPE_PERF_EVENT: case BPF_PROG_TYPE_LSM: return BTF_KFUNC_HOOK_TRACING; case BPF_PROG_TYPE_SYSCALL: return BTF_KFUNC_HOOK_SYSCALL; case BPF_PROG_TYPE_CGROUP_SKB: case BPF_PROG_TYPE_CGROUP_SOCK: case BPF_PROG_TYPE_CGROUP_DEVICE: case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: case BPF_PROG_TYPE_CGROUP_SOCKOPT: case BPF_PROG_TYPE_CGROUP_SYSCTL: case BPF_PROG_TYPE_SOCK_OPS: return BTF_KFUNC_HOOK_CGROUP; case BPF_PROG_TYPE_SCHED_ACT: return BTF_KFUNC_HOOK_SCHED_ACT; case BPF_PROG_TYPE_SK_SKB: return BTF_KFUNC_HOOK_SK_SKB; case BPF_PROG_TYPE_SOCKET_FILTER: return BTF_KFUNC_HOOK_SOCKET_FILTER; case BPF_PROG_TYPE_LWT_OUT: case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_XMIT: case BPF_PROG_TYPE_LWT_SEG6LOCAL: return BTF_KFUNC_HOOK_LWT; case BPF_PROG_TYPE_NETFILTER: return BTF_KFUNC_HOOK_NETFILTER; case BPF_PROG_TYPE_KPROBE: return BTF_KFUNC_HOOK_KPROBE; default: return BTF_KFUNC_HOOK_MAX;
}
}
/* Caution: * Reference to the module (obtained using btf_try_get_module) corresponding to * the struct btf *MUST* be held when calling this function from verifier * context. This is usually true as we stash references in prog's kfunc_btf_tab; * keeping the reference for the duration of the call provides the necessary * protection for looking up a well-formed btf->kfunc_set_tab.
*/
u32 *btf_kfunc_id_set_contains(conststruct btf *btf,
u32 kfunc_btf_id, conststruct bpf_prog *prog)
{ enum bpf_prog_type prog_type = resolve_prog_type(prog); enum btf_kfunc_hook hook;
u32 *kfunc_flags;
kfunc_flags = __btf_kfunc_id_set_contains(btf, BTF_KFUNC_HOOK_COMMON, kfunc_btf_id, prog); if (kfunc_flags) return kfunc_flags;
btf = btf_get_module_btf(kset->owner); if (!btf) return check_btf_kconfigs(kset->owner, "kfunc"); if (IS_ERR(btf)) return PTR_ERR(btf);
for (i = 0; i < kset->set->cnt; i++) {
ret = btf_check_kfunc_protos(btf, btf_relocate_id(btf, kset->set->pairs[i].id),
kset->set->pairs[i].flags); if (ret) goto err_out;
}
ret = btf_populate_kfunc_set(btf, hook, kset);
err_out:
btf_put(btf); return ret;
}
/* This function must be invoked only from initcalls/module init functions */ int register_btf_kfunc_id_set(enum bpf_prog_type prog_type, conststruct btf_kfunc_id_set *kset)
{ enum btf_kfunc_hook hook;
/* All kfuncs need to be tagged as such in BTF. * WARN() for initcall registrations that do not check errors.
*/ if (!(kset->set->flags & BTF_SET8_KFUNCS)) {
WARN_ON(!kset->owner); return -EINVAL;
}
/* This function must be invoked only from initcalls/module init functions */ int register_btf_fmodret_id_set(conststruct btf_kfunc_id_set *kset)
{ return __register_btf_kfunc_id_set(BTF_KFUNC_HOOK_FMODRET, kset);
}
EXPORT_SYMBOL_GPL(register_btf_fmodret_id_set);
if (!tab) return -ENOENT; /* Even though the size of tab->dtors[0] is > sizeof(u32), we only need * to compare the first u32 with btf_id, so we can reuse btf_id_cmp_func.
*/
BUILD_BUG_ON(offsetof(struct btf_id_dtor_kfunc, btf_id) != 0);
dtor = bsearch(&btf_id, tab->dtors, tab->cnt, sizeof(tab->dtors[0]), btf_id_cmp_func); if (!dtor) return -ENOENT; return dtor->kfunc_btf_id;
}
for (i = 0; i < cnt; i++) {
dtor_btf_id = btf_relocate_id(btf, dtors[i].kfunc_btf_id);
dtor_func = btf_type_by_id(btf, dtor_btf_id); if (!dtor_func || !btf_type_is_func(dtor_func)) return -EINVAL;
dtor_func_proto = btf_type_by_id(btf, dtor_func->type); if (!dtor_func_proto || !btf_type_is_func_proto(dtor_func_proto)) return -EINVAL;
/* Make sure the prototype of the destructor kfunc is 'void func(type *)' */
t = btf_type_by_id(btf, dtor_func_proto->type); if (!t || !btf_type_is_void(t)) return -EINVAL;
nr_args = btf_type_vlen(dtor_func_proto); if (nr_args != 1) return -EINVAL;
args = btf_params(dtor_func_proto);
t = btf_type_by_id(btf, args[0].type); /* Allow any pointer type, as width on targets Linux supports * will be same for all pointer types (i.e. sizeof(void *))
*/ if (!t || !btf_type_is_ptr(t)) return -EINVAL;
} return 0;
}
/* This function must be invoked only from initcalls/module init functions */ int register_btf_id_dtor_kfuncs(conststruct btf_id_dtor_kfunc *dtors, u32 add_cnt, struct module *owner)
{ struct btf_id_dtor_kfunc_tab *tab; struct btf *btf;
u32 tab_cnt, i; int ret;
btf = btf_get_module_btf(owner); if (!btf) return check_btf_kconfigs(owner, "dtor kfuncs"); if (IS_ERR(btf)) return PTR_ERR(btf);
if (add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) {
pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT);
ret = -E2BIG; goto end;
}
/* Ensure that the prototype of dtor kfuncs being registered is sane */
ret = btf_check_dtor_kfuncs(btf, dtors, add_cnt); if (ret < 0) goto end;
tab = btf->dtor_kfunc_tab; /* Only one call allowed for modules */ if (WARN_ON_ONCE(tab && btf_is_module(btf))) {
ret = -EINVAL; goto end;
}
tab_cnt = tab ? tab->cnt : 0; if (tab_cnt > U32_MAX - add_cnt) {
ret = -EOVERFLOW; goto end;
} if (tab_cnt + add_cnt >= BTF_DTOR_KFUNC_MAX_CNT) {
pr_err("cannot register more than %d kfunc destructors\n", BTF_DTOR_KFUNC_MAX_CNT);
ret = -E2BIG; goto end;
}
tab = krealloc(btf->dtor_kfunc_tab,
struct_size(tab, dtors, tab_cnt + add_cnt),
GFP_KERNEL | __GFP_NOWARN); if (!tab) {
ret = -ENOMEM; goto end;
}
if (!btf->dtor_kfunc_tab)
tab->cnt = 0;
btf->dtor_kfunc_tab = tab;
end: if (ret)
btf_free_dtor_kfunc_tab(btf);
btf_put(btf); return ret;
}
EXPORT_SYMBOL_GPL(register_btf_id_dtor_kfuncs);
#define MAX_TYPES_ARE_COMPAT_DEPTH 2
/* Check local and target types for compatibility. This check is used for * type-based CO-RE relocations and follow slightly different rules than * field-based relocations. This function assumes that root types were already * checked for name match. Beyond that initial root-level name check, names * are completely ignored. Compatibility rules are as follows: * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs/ENUM64s are considered compatible, but * kind should match for local and target types (i.e., STRUCT is not * compatible with UNION); * - for ENUMs/ENUM64s, the size is ignored; * - for INT, size and signedness are ignored; * - for ARRAY, dimensionality is ignored, element types are checked for * compatibility recursively; * - CONST/VOLATILE/RESTRICT modifiers are ignored; * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; * - FUNC_PROTOs are compatible if they have compatible signature: same * number of input args and compatible return and argument types. * These rules are not set in stone and probably will be adjusted as we get * more experience with using BPF CO-RE relocations.
*/ int bpf_core_types_are_compat(conststruct btf *local_btf, __u32 local_id, conststruct btf *targ_btf, __u32 targ_id)
{ return __bpf_core_types_are_compat(local_btf, local_id, targ_btf, targ_id,
MAX_TYPES_ARE_COMPAT_DEPTH);
}
if (*cc) {
bpf_free_cands_from_cache(*cc);
*cc = NULL;
}
new_cands = kmemdup(cands, sizeof_cands(cands->cnt), GFP_KERNEL_ACCOUNT); if (!new_cands) {
bpf_free_cands(cands); return ERR_PTR(-ENOMEM);
} /* strdup the name, since it will stay in cache. * the cands->name points to strings in prog's BTF and the prog can be unloaded.
*/
new_cands->name = kmemdup_nul(cands->name, cands->name_len, GFP_KERNEL_ACCOUNT);
bpf_free_cands(cands); if (!new_cands->name) {
kfree(new_cands); return ERR_PTR(-ENOMEM);
}
*cc = new_cands; return new_cands;
}
#ifdef CONFIG_DEBUG_INFO_BTF_MODULES staticvoid __purge_cand_cache(struct btf *btf, struct bpf_cand_cache **cache, int cache_size)
{ struct bpf_cand_cache *cc; int i, j;
for (i = 0; i < cache_size; i++) {
cc = cache[i]; if (!cc) continue; if (!btf) { /* when new module is loaded purge all of module_cand_cache, * since new module might have candidates with the name * that matches cached cands.
*/
bpf_free_cands_from_cache(cc);
cache[i] = NULL; continue;
} /* when module is unloaded purge cache entries * that match module's btf
*/ for (j = 0; j < cc->cnt; j++) if (cc->cands[j].btf == btf) {
bpf_free_cands_from_cache(cc);
cache[i] = NULL; break;
}
}
staticstruct bpf_cand_cache *
bpf_core_add_cands(struct bpf_cand_cache *cands, conststruct btf *targ_btf, int targ_start_id)
{ struct bpf_cand_cache *new_cands; conststruct btf_type *t; constchar *targ_name;
size_t targ_essent_len; int n, i;
n = btf_nr_types(targ_btf); for (i = targ_start_id; i < n; i++) {
t = btf_type_by_id(targ_btf, i); if (btf_kind(t) != cands->kind) continue;
targ_name = btf_name_by_offset(targ_btf, t->name_off); if (!targ_name) continue;
/* the resched point is before strncmp to make sure that search * for non-existing name will have a chance to schedule().
*/
cond_resched();
if (strncmp(cands->name, targ_name, cands->name_len) != 0) continue;
targ_essent_len = bpf_core_essential_name_len(targ_name); if (targ_essent_len != cands->name_len) continue;
/* most of the time there is only one candidate for a given kind+name pair */
new_cands = kmalloc(sizeof_cands(cands->cnt + 1), GFP_KERNEL_ACCOUNT); if (!new_cands) {
bpf_free_cands(cands); return ERR_PTR(-ENOMEM);
}
cc = check_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); /* cands is a pointer to stack here */ if (cc) { if (cc->cnt) return cc; goto check_modules;
}
/* Attempt to find target candidates in vmlinux BTF first */
cands = bpf_core_add_cands(cands, main_btf, 1); if (IS_ERR(cands)) return ERR_CAST(cands);
/* cands is a pointer to kmalloced memory here if cands->cnt > 0 */
/* populate cache even when cands->cnt == 0 */
cc = populate_cand_cache(cands, vmlinux_cand_cache, VMLINUX_CAND_CACHE_SIZE); if (IS_ERR(cc)) return ERR_CAST(cc);
/* if vmlinux BTF has any candidate, don't go for module BTFs */ if (cc->cnt) return cc;
check_modules: /* cands is a pointer to stack here and cands->cnt == 0 */
cc = check_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE); if (cc) /* if cache has it return it even if cc->cnt == 0 */ return cc;
/* If candidate is not found in vmlinux's BTF then search in module's BTFs */
spin_lock_bh(&btf_idr_lock);
idr_for_each_entry(&btf_idr, mod_btf, id) { if (!btf_is_module(mod_btf)) continue; /* linear search could be slow hence unlock/lock * the IDR to avoiding holding it for too long
*/
btf_get(mod_btf);
spin_unlock_bh(&btf_idr_lock);
cands = bpf_core_add_cands(cands, mod_btf, btf_nr_types(main_btf));
btf_put(mod_btf); if (IS_ERR(cands)) return ERR_CAST(cands);
spin_lock_bh(&btf_idr_lock);
}
spin_unlock_bh(&btf_idr_lock); /* cands is a pointer to kmalloced memory here if cands->cnt > 0 * or pointer to stack if cands->cnd == 0. * Copy it into the cache even when cands->cnt == 0 and * return the result.
*/ return populate_cand_cache(cands, module_cand_cache, MODULE_CAND_CACHE_SIZE);
}
/* ~4k of temp memory necessary to convert LLVM spec like "0:1:0:5" * into arrays of btf_ids of struct fields and array indices.
*/
specs = kcalloc(3, sizeof(*specs), GFP_KERNEL_ACCOUNT); if (!specs) return -ENOMEM;
type = btf_type_by_id(ctx->btf, relo->type_id); if (!type) {
bpf_log(ctx->log, "relo #%u: bad type id %u\n",
relo_idx, relo->type_id);
kfree(specs); return -EINVAL;
}
if (need_cands) { struct bpf_cand_cache *cc; int i;
mutex_lock(&cand_cache_mutex);
cc = bpf_core_find_cands(ctx, relo->type_id); if (IS_ERR(cc)) {
bpf_log(ctx->log, "target candidate search failed for %d\n",
relo->type_id);
err = PTR_ERR(cc); goto out;
} if (cc->cnt) {
cands.cands = kcalloc(cc->cnt, sizeof(*cands.cands), GFP_KERNEL_ACCOUNT); if (!cands.cands) {
err = -ENOMEM; goto out;
}
} for (i = 0; i < cc->cnt; i++) {
bpf_log(ctx->log, "CO-RE relocating %s %s: found target candidate [%d]\n",
btf_kind_str[cc->kind], cc->name, cc->cands[i].id);
cands.cands[i].btf = cc->cands[i].btf;
cands.cands[i].id = cc->cands[i].id;
}
cands.len = cc->cnt; /* cand_cache_mutex needs to span the cache lookup and * copy of btf pointer into bpf_core_cand_list, * since module can be unloaded while bpf_core_calc_relo_insn * is working with module's btf.
*/
}
btf_type_skip_modifiers(btf, mtype->type, &id); /* If we match on both type and name, the field is considered trusted. */ if (btf_id == id && !strcmp(field_name, m_name)) returntrue;
}
/* Exactly one of the two type names may be suffixed with ___init, so * if the strings are the same size, they can't possibly be no-cast * aliases of one another. If you have two of the same type names, e.g. * they're both nf_conn___init, it would be improper to return true * because they are _not_ no-cast aliases, they are the same type.
*/ if (reg_len == arg_len) returnfalse;
/* Either of the two names must be the other name, suffixed with ___init. */ if ((reg_len != arg_len + pattern_len) &&
(arg_len != reg_len + pattern_len)) returnfalse;
/* In the future, this can be ported to use BTF tagging */
param_name = btf_name_by_offset(btf, arg->name_off); if (str_is_empty(param_name)) returnfalse;
len = strlen(param_name); if (len <= suffix_len) returnfalse;
param_name += len - suffix_len; return !strncmp(param_name, suffix, suffix_len);
}
Messung V0.5 in Prozent
¤ Dauer der Verarbeitung: 0.223 Sekunden
(vorverarbeitet am 2026-04-26)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.