/* * Kernel addresses in the vmalloc space use at most 48 bits, and the * remaining bits are guaranteed to be 0x1. So we can compose the address * with a fixed length movn/movk/movk sequence.
*/ staticinlinevoid emit_addr_mov_i64(constint reg, const u64 val, struct jit_ctx *ctx)
{
u64 tmp = val; int shift = 0;
staticinlineint bpf2a64_offset(int bpf_insn, int off, conststruct jit_ctx *ctx)
{ /* BPF JMP offset is relative to the next instruction */
bpf_insn++; /* * Whereas arm64 branch instructions encode the offset * from the branch itself, so we must subtract 1 from the * instruction offset.
*/ return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1);
}
staticvoid jit_fill_hole(void *area, unsignedint size)
{
__le32 *ptr; /* We are guaranteed to have aligned memory. */ for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
}
int bpf_arch_text_invalidate(void *dst, size_t len)
{ if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len)) return -EINVAL;
return 0;
}
staticinlineint epilogue_offset(conststruct jit_ctx *ctx)
{ int to = ctx->epilogue_offset; int from = ctx->idx;
return to - from;
}
staticbool is_addsub_imm(u32 imm)
{ /* Either imm12 or shifted imm12. */ return !(imm & ~0xfff) || !(imm & ~0xfff000);
}
/* bpf function may be invoked by 3 instruction types: * 1. bl, attached via freplace to bpf prog via short jump * 2. br, attached via freplace to bpf prog via long jump * 3. blr, working as a function pointer, used by emit_call. * So BTI_JC should used here to support both br and blr.
*/
emit_bti(A64_BTI_JC, ctx);
if (!prog->aux->exception_cb) { /* Sign lr */ if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
emit(A64_PACIASP, ctx);
/* Save FP and LR registers to stay align with ARM64 AAPCS */
emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
emit(A64_MOV(1, A64_FP, A64_SP), ctx);
prepare_bpf_tail_call_cnt(ctx);
if (!ebpf_from_cbpf && is_main_prog) {
cur_offset = ctx->idx - idx0; if (cur_offset != PROLOGUE_OFFSET) {
pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
cur_offset, PROLOGUE_OFFSET); return -1;
} /* BTI landing pad for the tail call, done with a BR */
emit_bti(A64_BTI_J, ctx);
}
push_callee_regs(ctx);
} else { /* * Exception callback receives FP of Main Program as third * parameter
*/
emit(A64_MOV(1, A64_FP, A64_R(2)), ctx); /* * Main Program already pushed the frame record and the * callee-saved registers. The exception callback will not push * anything and re-use the main program's stack. * * 12 registers are on the stack
*/
emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx);
}
/* Stack must be multiples of 16B */
ctx->stack_size = round_up(prog->aux->stack_depth, 16);
if (ctx->fp_used) { if (ctx->priv_sp_used) { /* Set up private stack pointer */
priv_stack_ptr = prog->aux->priv_stack_ptr + PRIV_STACK_GUARD_SZ;
emit_percpu_ptr(priv_sp, priv_stack_ptr, ctx);
emit(A64_ADD_I(1, fp, priv_sp, ctx->stack_size), ctx);
} else { /* Set up BPF prog stack base register */
emit(A64_MOV(1, fp, A64_SP), ctx);
}
}
/* Set up function call stack */ if (ctx->stack_size && !ctx->priv_sp_used)
emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
if (ctx->arena_vm_start)
emit_a64_mov_i64(arena_vm_base, ctx->arena_vm_start, ctx);
/* build a plt initialized like this: * * plt: * ldr tmp, target * br tmp * target: * .quad dummy_tramp * * when a long jump trampoline is attached, target is filled with the * trampoline address, and when the trampoline is removed, target is * restored to dummy_tramp address.
*/ staticvoid build_plt(struct jit_ctx *ctx)
{ const u8 tmp = bpf2a64[TMP_REG_1]; struct bpf_plt *plt = NULL;
/* make sure target is 64-bit aligned */ if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2)
emit(A64_NOP, ctx);
plt = (struct bpf_plt *)(ctx->image + ctx->idx); /* plt is called via bl, no BTI needed here */
emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx);
emit(A64_BR(tmp), ctx);
if (ctx->image)
plt->target = (u64)&dummy_tramp;
}
if (!ctx->prog->aux->extable ||
WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries)) return -EINVAL;
ex = &ctx->prog->aux->extable[ctx->exentry_idx];
pc = (unsignedlong)&ctx->ro_image[ctx->idx - 1];
/* * This is the relative offset of the instruction that may fault from * the exception table itself. This will be written to the exception * table and if this instruction faults, the destination register will * be set to '0' and the execution will jump to the next instruction.
*/
ins_offset = pc - (long)&ex->insn; if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN)) return -ERANGE;
/* * Since the extable follows the program, the fixup offset is always * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value * to keep things simple, and put the destination register in the upper * bits. We don't need to worry about buildtime or runtime sort * modifying the upper bits because the table is already sorted, and * isn't part of the main exception table. * * The fixup_offset is set to the next instruction from the instruction * that may fault. The execution will jump to this after handling the * fault.
*/
fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE); if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset)) return -ERANGE;
/* * The offsets above have been calculated using the RO buffer but we * need to use the R/W buffer for writes. * switch ex to rw buffer for writing.
*/
ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image);
ex->insn = ins_offset;
if (BPF_CLASS(insn->code) != BPF_LDX)
dst_reg = DONT_CLEAR;
/* * - offset[0] offset of the end of prologue, * start of the 1st instruction. * - offset[1] - offset of the end of 1st instruction, * start of the 2nd instruction * [....] * - offset[3] - offset of the end of 3rd instruction, * start of 4th instruction
*/ for (i = 0; i < prog->len; i++) { conststruct bpf_insn *insn = &prog->insnsi[i]; int ret;
ctx->offset[i] = ctx->idx;
ret = build_insn(insn, ctx, extra_pass); if (ret > 0) {
i++;
ctx->offset[i] = ctx->idx; continue;
} if (ret) return ret;
} /* * offset is allocated with prog->len + 1 so fill in * the last element with the offset after the last * instruction (end of program)
*/
ctx->offset[i] = ctx->idx;
return 0;
}
staticint validate_code(struct jit_ctx *ctx)
{ int i;
for (i = 0; i < ctx->idx; i++) {
u32 a64_insn = le32_to_cpu(ctx->image[i]);
if (a64_insn == AARCH64_BREAK_FAULT) return -1;
} return 0;
}
staticint validate_ctx(struct jit_ctx *ctx)
{ if (validate_code(ctx)) return -1;
if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries)) return -1;
tmp = bpf_jit_blind_constants(prog); /* If blinding was requested and we failed during blinding, * we must fall back to the interpreter.
*/ if (IS_ERR(tmp)) return orig_prog; if (tmp != prog) {
tmp_blinded = true;
prog = tmp;
}
jit_data = prog->aux->jit_data; if (!jit_data) {
jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); if (!jit_data) {
prog = orig_prog; goto out;
}
prog->aux->jit_data = jit_data;
}
priv_stack_ptr = prog->aux->priv_stack_ptr; if (!priv_stack_ptr && prog->aux->jits_use_priv_stack) { /* Allocate actual private stack size with verifier-calculated * stack size plus two memory guards to protect overflow and * underflow.
*/
priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
2 * PRIV_STACK_GUARD_SZ;
priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 16, GFP_KERNEL); if (!priv_stack_ptr) {
prog = orig_prog; goto out_priv_stack;
}
/* Pass 1: Estimate the maximum image size. * * BPF line info needs ctx->offset[i] to be the offset of * instruction[i] in jited image, so build prologue first.
*/ if (build_prologue(&ctx, was_classic)) {
prog = orig_prog; goto out_off;
}
if (build_body(&ctx, extra_pass)) {
prog = orig_prog; goto out_off;
}
/* Now we know the maximum image size. */
prog_size = sizeof(u32) * ctx.idx; /* also allocate space for plt target */
extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
image_size = extable_offset + extable_size;
ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr, sizeof(u32), &header, &image_ptr,
jit_fill_hole); if (!ro_header) {
prog = orig_prog; goto out_off;
}
/* Pass 2: Determine jited position and result for each instruction */
/* * Use the image(RW) for writing the JITed instructions. But also save * the ro_image(RX) for calculating the offsets in the image. The RW * image will be later copied to the RX image from where the program * will run. The bpf_jit_binary_pack_finalize() will do this copy in the * final step.
*/
ctx.image = (__le32 *)image_ptr;
ctx.ro_image = (__le32 *)ro_image_ptr; if (extable_size)
prog->aux->extable = (void *)ro_image_ptr + extable_offset;
skip_init_ctx:
ctx.idx = 0;
ctx.exentry_idx = 0;
ctx.write = true;
build_prologue(&ctx, was_classic);
/* Record exentry_idx and body_idx before first build_body */
exentry_idx = ctx.exentry_idx;
body_idx = ctx.idx; /* Dont write body instructions to memory for now */
ctx.write = false;
if (build_body(&ctx, extra_pass)) {
prog = orig_prog; goto out_free_hdr;
}
/* Extra pass to validate JITed code. */ if (validate_ctx(&ctx)) {
prog = orig_prog; goto out_free_hdr;
}
/* update the real prog size */
prog_size = sizeof(u32) * ctx.idx;
/* And we're done. */ if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
if (!prog->is_func || extra_pass) { /* The jited image may shrink since the jited result for * BPF_CALL to subprog may be changed from indirect call * to direct call.
*/ if (extra_pass && ctx.idx > jit_data->ctx.idx) {
pr_err_once("multi-func JIT bug %d > %d\n",
ctx.idx, jit_data->ctx.idx);
prog->bpf_func = NULL;
prog->jited = 0;
prog->jited_len = 0; goto out_free_hdr;
} if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) { /* ro_header has been freed */
ro_header = NULL;
prog = orig_prog; goto out_off;
} /* * The instructions have now been copied to the ROX region from * where they will execute. Now the data cache has to be cleaned to * the PoU and the I-cache has to be invalidated for the VAs.
*/
bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx);
} else {
jit_data->ctx = ctx;
jit_data->ro_image = ro_image_ptr;
jit_data->header = header;
jit_data->ro_header = ro_header;
}
if (l->cookie == 0) { /* if cookie is zero, one instruction is enough to store it */
emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx);
} else {
emit_a64_mov_i64(A64_R(10), l->cookie, ctx);
emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off),
ctx);
}
/* save p to callee saved register x19 to avoid loading p with mov_i64 * each time.
*/
emit_addr_mov_i64(A64_R(19), (const u64)p, ctx);
staticvoid invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl, int bargs_off, int retval_off, int run_ctx_off,
__le32 **branches)
{ int i;
/* The first fmod_ret program will receive a garbage return value. * Set this to 0 to avoid confusing the program.
*/
emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx); for (i = 0; i < tl->nr_links; i++) {
invoke_bpf_prog(ctx, tl->links[i], bargs_off, retval_off,
run_ctx_off, true); /* if (*(u64 *)(sp + retval_off) != 0) * goto do_fexit;
*/
emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx); /* Save the location of branch, and generate a nop. * This nop will be replaced with a cbnz later.
*/
branches[i] = ctx->image + ctx->idx;
emit(A64_NOP, ctx);
}
}
struct arg_aux { /* how many args are passed through registers, the rest of the args are * passed through stack
*/ int args_in_regs; /* how many registers are used to pass arguments */ int regs_for_args; /* how much stack is used for additional args passed to bpf program * that did not fit in original function registers
*/ int bstack_for_args; /* home much stack is used for additional args passed to the * original function when called from trampoline (this one needs * arguments to be properly aligned)
*/ int ostack_for_args;
};
/* the rest arguments are passed through stack */ for (; i < m->nr_args; i++) {
stack_slots = (m->arg_size[i] + 7) / 8;
a->bstack_for_args += stack_slots * 8;
a->ostack_for_args = a->ostack_for_args + stack_slots * 8;
}
return 0;
}
staticvoid clear_garbage(struct jit_ctx *ctx, int reg, int effective_bytes)
{ if (effective_bytes) { int garbage_bits = 64 - 8 * effective_bytes; #ifdef CONFIG_CPU_BIG_ENDIAN /* garbage bits are at the right end */
emit(A64_LSR(1, reg, reg, garbage_bits), ctx);
emit(A64_LSL(1, reg, reg, garbage_bits), ctx); #else /* garbage bits are at the left end */
emit(A64_LSL(1, reg, reg, garbage_bits), ctx);
emit(A64_LSR(1, reg, reg, garbage_bits), ctx); #endif
}
}
staticvoid save_args(struct jit_ctx *ctx, int bargs_off, int oargs_off, conststruct btf_func_model *m, conststruct arg_aux *a, bool for_call_origin)
{ int i; int reg; int doff; int soff; int slots;
u8 tmp = bpf2a64[TMP_REG_1];
/* store arguments to the stack for the bpf program, or restore * arguments from stack for the original function
*/ for (reg = 0; reg < a->regs_for_args; reg++) {
emit(for_call_origin ?
A64_LDR64I(reg, A64_SP, bargs_off) :
A64_STR64I(reg, A64_SP, bargs_off),
ctx);
bargs_off += 8;
}
soff = 32; /* on stack arguments start from FP + 32 */
doff = (for_call_origin ? oargs_off : bargs_off);
/* save on stack arguments */ for (i = a->args_in_regs; i < m->nr_args; i++) {
slots = (m->arg_size[i] + 7) / 8; /* verifier ensures arg_size <= 16, so slots equals 1 or 2 */ while (slots-- > 0) {
emit(A64_LDR64I(tmp, A64_FP, soff), ctx); /* if there is unused space in the last slot, clear * the garbage contained in the space.
*/ if (slots == 0 && !for_call_origin)
clear_garbage(ctx, tmp, m->arg_size[i] % 8);
emit(A64_STR64I(tmp, A64_SP, doff), ctx);
soff += 8;
doff += 8;
}
}
}
staticvoid restore_args(struct jit_ctx *ctx, int bargs_off, int nregs)
{ int reg;
/* Based on the x86's implementation of arch_prepare_bpf_trampoline(). * * bpf prog and function entry before bpf trampoline hooked: * mov x9, lr * nop * * bpf prog and function entry after bpf trampoline hooked: * mov x9, lr * bl <bpf_trampoline or plt> *
*/ staticint prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im, struct bpf_tramp_links *tlinks, void *func_addr, conststruct btf_func_model *m, conststruct arg_aux *a,
u32 flags)
{ int i; int stack_size; int retaddr_off; int regs_off; int retval_off; int bargs_off; int nfuncargs_off; int ip_off; int run_ctx_off; int oargs_off; int nfuncargs; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; bool save_ret;
__le32 **branches = NULL; bool is_struct_ops = is_struct_ops_tramp(fentry);
if (flags & BPF_TRAMP_F_INDIRECT) { /* * Indirect call for bpf_struct_ops
*/
emit_kcfi(cfi_get_func_hash(func_addr), ctx);
} /* bpf trampoline may be invoked by 3 instruction types: * 1. bl, attached to bpf prog or kernel function via short jump * 2. br, attached to bpf prog or kernel function via long jump * 3. blr, working as a function pointer, used by struct_ops. * So BTI_JC should used here to support both br and blr.
*/
emit_bti(A64_BTI_JC, ctx);
/* x9 is not set for struct_ops */ if (!is_struct_ops) { /* frame for parent function */
emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx);
emit(A64_MOV(1, A64_FP, A64_SP), ctx);
}
/* frame for patched function for tracing, or caller for struct_ops */
emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
emit(A64_MOV(1, A64_FP, A64_SP), ctx);
/* allocate stack space */
emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
if (flags & BPF_TRAMP_F_IP_ARG) { /* save ip address of the traced function */
emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx);
emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
}
if (flags & BPF_TRAMP_F_CALL_ORIG) { /* for the first pass, assume the worst case */ if (!ctx->image)
ctx->idx += 4; else
emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
emit_call((const u64)__bpf_tramp_enter, ctx);
}
for (i = 0; i < fentry->nr_links; i++)
invoke_bpf_prog(ctx, fentry->links[i], bargs_off,
retval_off, run_ctx_off,
flags & BPF_TRAMP_F_RET_FENTRY_RET);
if (fmod_ret->nr_links) {
branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *),
GFP_KERNEL); if (!branches) return -ENOMEM;
if (flags & BPF_TRAMP_F_CALL_ORIG) { /* save args for original func */
save_args(ctx, bargs_off, oargs_off, m, a, true); /* call original func */
emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
emit(A64_RET(A64_R(10)), ctx); /* store return value */
emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx); /* reserve a nop for bpf_tramp_image_put */
im->ip_after_call = ctx->ro_image + ctx->idx;
emit(A64_NOP, ctx);
}
/* update the branches saved in invoke_bpf_mod_ret with cbnz */ for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) { int offset = &ctx->image[ctx->idx] - branches[i];
*branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset));
}
for (i = 0; i < fexit->nr_links; i++)
invoke_bpf_prog(ctx, fexit->links[i], bargs_off, retval_off,
run_ctx_off, false);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
im->ip_epilogue = ctx->ro_image + ctx->idx; /* for the first pass, assume the worst case */ if (!ctx->image)
ctx->idx += 4; else
emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
emit_call((const u64)__bpf_tramp_exit, ctx);
}
if (flags & BPF_TRAMP_F_RESTORE_REGS)
restore_args(ctx, bargs_off, a->regs_for_args);
/* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf * trampoline with the branch instruction from @ip to @new_addr. If @old_addr * or @new_addr is NULL, the old or new instruction is NOP. * * When @ip is the bpf prog entry, a bpf trampoline is being attached or * detached. Since bpf trampoline and bpf prog are allocated separately with * vmalloc, the address distance may exceed 128MB, the maximum branch range. * So long jump should be handled. * * When a bpf prog is constructed, a plt pointing to empty trampoline * dummy_tramp is placed at the end: * * bpf_prog: * mov x9, lr * nop // patchsite * ... * ret * * plt: * ldr x10, target * br x10 * target: * .quad dummy_tramp // plt target * * This is also the state when no trampoline is attached. * * When a short-jump bpf trampoline is attached, the patchsite is patched * to a bl instruction to the trampoline directly: * * bpf_prog: * mov x9, lr * bl <short-jump bpf trampoline address> // patchsite * ... * ret * * plt: * ldr x10, target * br x10 * target: * .quad dummy_tramp // plt target * * When a long-jump bpf trampoline is attached, the plt target is filled with * the trampoline address and the patchsite is patched to a bl instruction to * the plt: * * bpf_prog: * mov x9, lr * bl plt // patchsite * ... * ret * * plt: * ldr x10, target * br x10 * target: * .quad <long-jump bpf trampoline address> // plt target * * The dummy_tramp is used to prevent another CPU from jumping to unknown * locations during the patching process, making the patching process easier.
*/ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, void *old_addr, void *new_addr)
{ int ret;
u32 old_insn;
u32 new_insn;
u32 replaced; struct bpf_plt *plt = NULL; unsignedlong size = 0UL; unsignedlong offset = ~0UL; enum aarch64_insn_branch_type branch_type; char namebuf[KSYM_NAME_LEN]; void *image = NULL;
u64 plt_target = 0ULL; bool poking_bpf_entry;
if (!__bpf_address_lookup((unsignedlong)ip, &size, &offset, namebuf)) /* Only poking bpf text is supported. Since kernel function * entry is set up by ftrace, we reply on ftrace to poke kernel * functions.
*/ return -ENOTSUPP;
image = ip - offset; /* zero offset means we're poking bpf prog entry */
poking_bpf_entry = (offset == 0UL);
/* bpf prog entry, find plt and the real patchsite */ if (poking_bpf_entry) { /* plt locates at the end of bpf prog */
plt = image + size - PLT_TARGET_OFFSET;
/* skip to the nop instruction in bpf prog entry: * bti c // if BTI enabled * mov x9, x30 * nop
*/
ip = image + POKE_OFFSET * AARCH64_INSN_SIZE;
}
/* long jump is only possible at bpf prog entry */ if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) &&
!poking_bpf_entry)) return -EINVAL;
if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0) return -EFAULT;
if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0) return -EFAULT;
if (is_long_jump(ip, new_addr))
plt_target = (u64)new_addr; elseif (is_long_jump(ip, old_addr)) /* if the old target is a long jump and the new target is not, * restore the plt target to dummy_tramp, so there is always a * legal and harmless address stored in plt target, and we'll * never jump from plt to an unknown place.
*/
plt_target = (u64)&dummy_tramp;
if (plt_target) { /* non-zero plt_target indicates we're patching a bpf prog, * which is read only.
*/ if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1)) return -EFAULT;
WRITE_ONCE(plt->target, plt_target);
set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1); /* since plt target points to either the new trampoline * or dummy_tramp, even if another CPU reads the old plt * target value before fetching the bl instruction to plt, * it will be brought back by dummy_tramp, so no barrier is * required here.
*/
}
/* if the old target and the new target are both long jumps, no * patching is required
*/ if (old_insn == new_insn) return 0;
mutex_lock(&text_mutex); if (aarch64_insn_read(ip, &replaced)) {
ret = -EFAULT; goto out;
}
if (replaced != old_insn) {
ret = -EFAULT; goto out;
}
/* We call aarch64_insn_patch_text_nosync() to replace instruction * atomically, so no other CPUs will fetch a half-new and half-old * instruction. But there is chance that another CPU executes the * old instruction after the patching operation finishes (e.g., * pipeline not flushed, or icache not synchronized yet). * * 1. when a new trampoline is attached, it is not a problem for * different CPUs to jump to different trampolines temporarily. * * 2. when an old trampoline is freed, we should wait for all other * CPUs to exit the trampoline and make sure the trampoline is no * longer reachable, since bpf_tramp_image_put() function already * uses percpu_ref and task-based rcu to do the sync, no need to call * the sync version here, see bpf_tramp_image_put() for details.
*/
ret = aarch64_insn_patch_text_nosync(ip, new_insn);
out:
mutex_unlock(&text_mutex);
bool bpf_jit_supports_exceptions(void)
{ /* We unwind through both kernel frames starting from within bpf_throw * call and BPF frames. Therefore we require FP unwinder to be enabled * to walk kernel frames and reach BPF frames in the stack trace. * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y
*/ returntrue;
}
bool bpf_jit_supports_arena(void)
{ returntrue;
}
bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
{ if (!in_arena) returntrue; switch (insn->code) { case BPF_STX | BPF_ATOMIC | BPF_W: case BPF_STX | BPF_ATOMIC | BPF_DW: if (!bpf_atomic_is_load_store(insn) &&
!cpus_have_cap(ARM64_HAS_LSE_ATOMICS)) returnfalse;
} returntrue;
}
bool bpf_jit_bypass_spec_v4(void)
{ /* In case of arm64, we rely on the firmware mitigation of Speculative * Store Bypass as controlled via the ssbd kernel parameter. Whenever * the mitigation is enabled, it works for all of the kernel code with * no need to provide any additional instructions. Therefore, skip * inserting nospec insns against Spectre v4.
*/ returntrue;
}
bool bpf_jit_inlines_helper_call(s32 imm)
{ switch (imm) { case BPF_FUNC_get_smp_processor_id: case BPF_FUNC_get_current_task: case BPF_FUNC_get_current_task_btf: returntrue; default: returnfalse;
}
}
/* * If we fail the final pass of JIT (from jit_subprogs), * the program may not be finalized yet. Call finalize here * before freeing it.
*/ if (jit_data) {
bpf_jit_binary_pack_finalize(jit_data->ro_header, jit_data->header);
kfree(jit_data);
}
prog->bpf_func -= cfi_get_offset();
hdr = bpf_jit_binary_pack_hdr(prog);
bpf_jit_binary_pack_free(hdr, NULL);
priv_stack_ptr = prog->aux->priv_stack_ptr; if (priv_stack_ptr) {
priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 16) +
2 * PRIV_STACK_GUARD_SZ;
priv_stack_check_guard(priv_stack_ptr, priv_stack_alloc_sz, prog);
free_percpu(prog->aux->priv_stack_ptr);
}
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
}
bpf_prog_unlock_free(prog);
}
Messung V0.5 in Prozent
¤ Dauer der Verarbeitung: 0.66 Sekunden
(vorverarbeitet am 2026-04-26)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.