/* The following structure describes the stack layout of the loader program. * In addition R6 contains the pointer to context. * R7 contains the result of the last sys_bpf command (typically error or FD). * R9 contains the result of the last sys_close command. * * Naming convention: * ctx - bpf program context * stack - bpf program stack * blob - bpf_attr-s, strings, insns, map data. * All the bytes that loader prog will use for read/write.
*/ struct loader_stack {
__u32 btf_fd;
__u32 inner_map_fd;
__u32 prog_fd[MAX_USED_PROGS];
};
/* amount of stack actually used, only used to calculate iterations, not stack offset */
nr_progs_sz = offsetof(struct loader_stack, prog_fd[nr_progs]); /* jump over cleanup code */
emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, /* size of cleanup code below (including map fd cleanup) */
(nr_progs_sz / 4) * 3 + 2 + /* 6 insns for emit_sys_close_blob, * 6 insns for debug_regs in emit_sys_close_blob
*/
nr_maps * (6 + (gen->log_level ? 6 : 0))));
/* remember the label where all error branches will jump to */
gen->cleanup_label = gen->insn_cur - gen->insn_start; /* emit cleanup code: close all temp FDs */ for (i = 0; i < nr_progs_sz; i += 4) {
emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -stack_sz + i));
emit(gen, BPF_JMP_IMM(BPF_JSLE, BPF_REG_1, 0, 1));
emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_close));
} for (i = 0; i < nr_maps; i++)
emit_sys_close_blob(gen, blob_fd_array_off(gen, i)); /* R7 contains the error code from sys_bpf. Copy it into R0 and exit. */
emit(gen, BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
emit(gen, BPF_EXIT_INSN());
}
/* Get index for map_fd/btf_fd slot in reserved fd_array, or in data relative * to start of fd_array. Caller can decide if it is usable or not.
*/ staticint add_map_fd(struct bpf_gen *gen)
{ if (gen->nr_maps == MAX_USED_MAPS) {
pr_warn("Total maps exceeds %d\n", MAX_USED_MAPS);
gen->error = -E2BIG; return 0;
} return gen->nr_maps++;
}
staticint add_kfunc_btf_fd(struct bpf_gen *gen)
{ int cur;
staticvoid move_blob2ctx(struct bpf_gen *gen, int ctx_off, int size, int blob_off)
{
emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
0, 0, 0, blob_off));
emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_1, 0));
emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off));
}
staticvoid move_ctx2blob(struct bpf_gen *gen, int off, int size, int ctx_off, bool check_non_zero)
{
emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_6, ctx_off)); if (check_non_zero) /* If value in ctx is zero don't update the blob. * For example: when ctx->map.max_entries == 0, keep default max_entries from bpf.c
*/
emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3));
emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
0, 0, 0, off));
emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
}
staticvoid move_stack2blob(struct bpf_gen *gen, int off, int size, int stack_off)
{
emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off));
emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_1, BPF_PSEUDO_MAP_IDX_VALUE,
0, 0, 0, off));
emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_1, BPF_REG_0, 0));
}
staticvoid move_stack2ctx(struct bpf_gen *gen, int ctx_off, int size, int stack_off)
{
emit(gen, BPF_LDX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_0, BPF_REG_10, stack_off));
emit(gen, BPF_STX_MEM(insn_bytes_to_bpf_size(size), BPF_REG_6, BPF_REG_0, ctx_off));
}
staticvoid emit_sys_bpf(struct bpf_gen *gen, int cmd, int attr, int attr_size)
{
emit(gen, BPF_MOV64_IMM(BPF_REG_1, cmd));
emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_2, BPF_PSEUDO_MAP_IDX_VALUE,
0, 0, 0, attr));
emit(gen, BPF_MOV64_IMM(BPF_REG_3, attr_size));
emit(gen, BPF_EMIT_CALL(BPF_FUNC_sys_bpf)); /* remember the result in R7 */
emit(gen, BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
}
staticbool is_simm16(__s64 value)
{ return value == (__s64)(__s16)value;
}
/* R7 contains result of last sys_bpf command. * if (R7 < 0) goto cleanup;
*/ if (is_simm16(off)) {
emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, off));
} else {
gen->error = -ERANGE;
emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, -1));
}
}
/* reg1 and reg2 should not be R1 - R5. They can be R0, R6 - R10 */ staticvoid emit_debug(struct bpf_gen *gen, int reg1, int reg2, constchar *fmt, va_list args)
{ char buf[1024]; int addr, len, ret;
if (!gen->log_level) return;
ret = vsnprintf(buf, sizeof(buf), fmt, args); if (ret < 1024 - 7 && reg1 >= 0 && reg2 < 0) /* The special case to accommodate common debug_ret(): * to avoid specifying BPF_REG_7 and adding " r=%%d" to * prints explicitly.
*/
strcat(buf, " r=%d");
len = strlen(buf) + 1;
addr = add_data(gen, buf, len);
/* * Fields of bpf_attr are set to values in native byte-order before being * written to the target-bound data blob, and may need endian conversion. * This macro allows providing the correct value in situ more simply than * writing a separate converter for *all fields* of *all records* included * in union bpf_attr. Note that sizeof(rval) should match the assignment * target to avoid runtime problems.
*/ #define tgt_endian(rval) ({ \
typeof(rval) _val = (rval); \ if (gen->swapped_endian) { \ switch (sizeof(_val)) { \ case 1: break; \ case 2: _val = bswap_16(_val); break; \ case 4: _val = bswap_32(_val); break; \ case 8: _val = bswap_64(_val); break; \ default: pr_warn("unsupported bswap size!\n"); \
} \
} \
_val; \
})
void bpf_gen__load_btf(struct bpf_gen *gen, constvoid *btf_raw_data,
__u32 btf_raw_size)
{ int attr_size = offsetofend(union bpf_attr, btf_log_level); int btf_data, btf_load_attr; union bpf_attr attr;
/* returns existing ksym_desc with ref incremented, or inserts a new one */ staticstruct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo)
{ struct ksym_desc *kdesc; int i;
/* Expects: * BPF_REG_8 - pointer to instruction * * We need to reuse BTF fd for same symbol otherwise each relocation takes a new * index, while kernel limits total kfunc BTFs to 256. For duplicate symbols, * this would mean a new BTF fd index for each entry. By pairing symbol name * with index, we get the insn->imm, insn->off pairing that kernel uses for * kfunc_tab, which becomes the effective limit even though all of them may * share same index in fd_array (such that kfunc_btf_tab has 1 element).
*/ staticvoid emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insn)
{ struct ksym_desc *kdesc; int btf_fd_idx;
kdesc = get_ksym_desc(gen, relo); if (!kdesc) return; /* try to copy from existing bpf_insn */ if (kdesc->ref > 1) {
move_blob2blob(gen, insn + offsetof(struct bpf_insn, imm), 4,
kdesc->insn + offsetof(struct bpf_insn, imm));
move_blob2blob(gen, insn + offsetof(struct bpf_insn, off), 2,
kdesc->insn + offsetof(struct bpf_insn, off)); goto log;
} /* remember insn offset, so we can copy BTF ID and FD later */
kdesc->insn = insn;
emit_bpf_find_by_name_kind(gen, relo); if (!relo->is_weak)
emit_check_err(gen); /* get index in fd_array to store BTF FD at */
btf_fd_idx = add_kfunc_btf_fd(gen); if (btf_fd_idx > INT16_MAX) {
pr_warn("BTF fd off %d for kfunc %s exceeds INT16_MAX, cannot process relocation\n",
btf_fd_idx, relo->name);
gen->error = -E2BIG; return;
}
kdesc->off = btf_fd_idx; /* jump to success case */
emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); /* set value for imm, off as 0 */
emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0));
emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); /* skip success case for ret < 0 */
emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 10)); /* store btf_id into insn[insn_idx].imm */
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); /* obtain fd in BPF_REG_9 */
emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7));
emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); /* load fd_array slot pointer */
emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); /* store BTF fd in slot, 0 for vmlinux */
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); /* jump to insn[insn_idx].off store if fd denotes module BTF */
emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); /* set the default value for off */
emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); /* skip BTF fd store for vmlinux BTF */
emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 1)); /* store index into insn[insn_idx].off */
emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx));
log: if (!gen->log_level) return;
emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_8,
offsetof(struct bpf_insn, imm)));
emit(gen, BPF_LDX_MEM(BPF_H, BPF_REG_9, BPF_REG_8,
offsetof(struct bpf_insn, off)));
debug_regs(gen, BPF_REG_7, BPF_REG_9, " func (%s:count=%d): imm: %%d, off: %%d",
relo->name, kdesc->ref);
emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
0, 0, 0, blob_fd_array_off(gen, kdesc->off)));
emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_0, 0));
debug_regs(gen, BPF_REG_9, -1, " func (%s:count=%d): btf_fd",
relo->name, kdesc->ref);
}
/* populate union bpf_attr with a pointer to license */
emit_rel_store(gen, attr_field(prog_load_attr, license), license_off);
/* populate union bpf_attr with a pointer to instructions */
emit_rel_store(gen, attr_field(prog_load_attr, insns), insns_off);
/* populate union bpf_attr with a pointer to func_info */
emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info);
/* populate union bpf_attr with a pointer to line_info */
emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info);
/* populate union bpf_attr with a pointer to core_relos */
emit_rel_store(gen, attr_field(prog_load_attr, core_relos), core_relos);
/* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */
emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array);
/* populate union bpf_attr with user provided log details */
move_ctx2blob(gen, attr_field(prog_load_attr, log_level), 4,
offsetof(struct bpf_loader_ctx, log_level), false);
move_ctx2blob(gen, attr_field(prog_load_attr, log_size), 4,
offsetof(struct bpf_loader_ctx, log_size), false);
move_ctx2blob(gen, attr_field(prog_load_attr, log_buf), 8,
offsetof(struct bpf_loader_ctx, log_buf), false); /* populate union bpf_attr with btf_fd saved in the stack earlier */
move_stack2blob(gen, attr_field(prog_load_attr, prog_btf_fd), 4,
stack_off(btf_fd)); if (gen->attach_kind) {
emit_find_attach_target(gen); /* populate union bpf_attr with btf_id and btf_obj_fd found by helper */
emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,
0, 0, 0, prog_load_attr));
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
offsetof(union bpf_attr, attach_btf_id)));
emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32));
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,
offsetof(union bpf_attr, attach_btf_obj_fd)));
}
emit_relos(gen, insns_off); /* emit PROG_LOAD command */
emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size);
debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt); /* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */
cleanup_relos(gen, insns_off); if (gen->attach_kind) {
emit_sys_close_blob(gen,
attr_field(prog_load_attr, attach_btf_obj_fd));
gen->attach_kind = 0;
}
emit_check_err(gen); /* remember prog_fd in the stack, if successful */
emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7,
stack_off(prog_fd[gen->nr_progs])));
gen->nr_progs++;
}
void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue,
__u32 value_size)
{ int attr_size = offsetofend(union bpf_attr, flags); int map_update_attr, value, key; union bpf_attr attr; int zero = 0;
memset(&attr, 0, attr_size);
value = add_data(gen, pvalue, value_size);
key = add_data(gen, &zero, sizeof(zero));
void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slot, int inner_map_idx)
{ int attr_size = offsetofend(union bpf_attr, flags); int map_update_attr, key; union bpf_attr attr; int tgt_slot;
void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx)
{ int attr_size = offsetofend(union bpf_attr, map_fd); int map_freeze_attr; union bpf_attr attr;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.