/* * eBPF prog stack layout: * * high * original ARM_SP => +-----+ * | | callee saved registers * +-----+ <= (BPF_FP + SCRATCH_SIZE) * | ... | eBPF JIT scratch space * eBPF fp register => +-----+ * (BPF_FP) | ... | eBPF prog stack * +-----+ * |RSVD | JIT scratchpad * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) * | ... | caller-saved registers * +-----+ * | ... | arguments passed on stack * ARM_SP during call => +-----| * | | * | ... | Function call stack * | | * +-----+ * low * * The callee saved registers depends on whether frame pointers are enabled. * With frame pointers (to be compliant with the ABI): * * high * original ARM_SP => +--------------+ \ * | pc | | * current ARM_FP => +--------------+ } callee saved registers * |r4-r9,fp,ip,lr| | * +--------------+ / * low * * Without frame pointers: * * high * original ARM_SP => +--------------+ * | r4-r9,fp,lr | callee saved registers * current ARM_FP => +--------------+ * low * * When popping registers off the stack at the end of a BPF function, we * reference them via the current ARM_FP register. * * Some eBPF operations are implemented via a call to a helper function. * Such calls are "invisible" in the eBPF code, so it is up to the calling * program to preserve any caller-saved ARM registers during the call. The * JIT emits code to push and pop those registers onto the stack, immediately * above the callee stack frame.
*/ #define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \
1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R9 | \
1 << ARM_FP) #define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) #define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC)
enum { /* Stack layout - these are offsets from (top of stack - 4) */
BPF_R2_HI,
BPF_R2_LO,
BPF_R3_HI,
BPF_R3_LO,
BPF_R4_HI,
BPF_R4_LO,
BPF_R5_HI,
BPF_R5_LO,
BPF_R7_HI,
BPF_R7_LO,
BPF_R8_HI,
BPF_R8_LO,
BPF_R9_HI,
BPF_R9_LO,
BPF_FP_HI,
BPF_FP_LO,
BPF_TC_HI,
BPF_TC_LO,
BPF_AX_HI,
BPF_AX_LO, /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, * BPF_REG_FP and Tail call counts.
*/
BPF_JIT_SCRATCH_REGS,
};
/* * Negative "register" values indicate the register is stored on the stack * and are the offset from the top of the eBPF JIT scratch space.
*/ #define STACK_OFFSET(k) (-4 - (k) * 4) #define SCRATCH_SIZE (BPF_JIT_SCRATCH_REGS * 4)
/* * Map eBPF registers to ARM 32bit registers or stack scratch space. * * 1. First argument is passed using the arm 32bit registers and rest of the * arguments are passed on stack scratch space. * 2. First callee-saved argument is mapped to arm 32 bit registers and rest * arguments are mapped to scratch space on stack. * 3. We need two 64 bit temp registers to do complex operations on eBPF * registers. * * As the eBPF registers are all 64 bit registers and arm has only 32 bit * registers, we have to map each eBPF registers with two arm 32 bit regs or * scratch memory space and we have to build eBPF 64 bit register from those. *
*/ staticconst s8 bpf2a32[][2] = { /* return value from in-kernel function, and exit value from eBPF */
[BPF_REG_0] = {ARM_R1, ARM_R0}, /* arguments from eBPF program to in-kernel function */
[BPF_REG_1] = {ARM_R3, ARM_R2}, /* Stored on stack scratch space */
[BPF_REG_2] = {STACK_OFFSET(BPF_R2_HI), STACK_OFFSET(BPF_R2_LO)},
[BPF_REG_3] = {STACK_OFFSET(BPF_R3_HI), STACK_OFFSET(BPF_R3_LO)},
[BPF_REG_4] = {STACK_OFFSET(BPF_R4_HI), STACK_OFFSET(BPF_R4_LO)},
[BPF_REG_5] = {STACK_OFFSET(BPF_R5_HI), STACK_OFFSET(BPF_R5_LO)}, /* callee saved registers that in-kernel function will preserve */
[BPF_REG_6] = {ARM_R5, ARM_R4}, /* Stored on stack scratch space */
[BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)},
[BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)},
[BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, /* Read only Frame Pointer to access Stack */
[BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)}, /* Temporary Register for BPF JIT, can be used * for constant blindings and others.
*/
[TMP_REG_1] = {ARM_R7, ARM_R6},
[TMP_REG_2] = {ARM_R9, ARM_R8}, /* Tail call count. Stored on stack scratch space. */
[TCALL_CNT] = {STACK_OFFSET(BPF_TC_HI), STACK_OFFSET(BPF_TC_LO)}, /* temporary register for blinding constants. * Stored on stack scratch space.
*/
[BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)},
};
/* * JIT Context: * * prog : bpf_prog * idx : index of current last JITed instruction. * prologue_bytes : bytes used in prologue. * epilogue_offset : offset of epilogue starting. * offsets : array of eBPF instruction offsets in * JITed code. * target : final JITed code. * epilogue_bytes : no of bytes used in epilogue. * imm_count : no of immediate counts used for global * variables. * imms : array of global variable addresses.
*/
/* * Wrappers which handle both OABI and EABI and assures Thumb2 interworking * (where the assembly routines like __aeabi_uidiv could cause problems).
*/ static u32 jit_udiv32(u32 dividend, u32 divisor)
{ return dividend / divisor;
}
/* * Initializes the JIT space with undefined instructions.
*/ staticvoid jit_fill_hole(void *area, unsignedint size)
{
u32 *ptr; /* We are guaranteed to have aligned memory. */ for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
*ptr++ = __opcode_to_mem_arm(ARM_INST_UDF);
}
#ifdefined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) /* EABI requires the stack to be aligned to 64-bit boundaries */ #define STACK_ALIGNMENT 8 #else /* Stack must be aligned to 32-bit boundaries */ #define STACK_ALIGNMENT 4 #endif
/* total stack size used in JITed code */ #define _STACK_SIZE (ctx->prog->aux->stack_depth + SCRATCH_SIZE) #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
/* on the "fake" run we just count them (duplicates included) */ if (ctx->target == NULL) {
ctx->imm_count++; return 0;
}
while ((i < ctx->imm_count) && ctx->imms[i]) { if (ctx->imms[i] == k) break;
i++;
}
if (ctx->imms[i] == 0)
ctx->imms[i] = k;
/* constants go just after the epilogue */
offset = ctx->offsets[ctx->prog->len - 1] * 4;
offset += ctx->prologue_bytes;
offset += ctx->epilogue_bytes;
offset += i * 4;
ctx->target[offset / 4] = k;
/* PC in ARM mode == address of the instruction + 8 */
imm = offset - (8 + ctx->idx * 4);
if (imm & ~0xfff) { /* * literal pool is too far, signal it into flags. we * can only detect it on the second pass unfortunately.
*/
ctx->flags |= FLAG_IMM_OVERFLOW; return 0;
}
return imm;
}
#endif/* __LINUX_ARM_ARCH__ */
staticinlineint bpf2a32_offset(int bpf_to, int bpf_from, conststruct jit_ctx *ctx) { int to, from;
if (ctx->target == NULL) return 0;
to = ctx->offsets[bpf_to];
from = ctx->offsets[bpf_from];
return to - from - 1;
}
/* * Move an immediate that's not an imm8m to a core register.
*/ staticinlinevoid emit_mov_i_no8m(const u8 rd, u32 val, struct jit_ctx *ctx)
{ #if __LINUX_ARM_ARCH__ < 7
emit(ARM_LDR_I(rd, ARM_PC, imm_offset(val, ctx)), ctx); #else
emit(ARM_MOVW(rd, val & 0xffff), ctx); if (val > 0xffff)
emit(ARM_MOVT(rd, val >> 16), ctx); #endif
}
staticinlineint epilogue_offset(conststruct jit_ctx *ctx)
{ int to, from; /* No need for 1st dummy run */ if (ctx->target == NULL) return 0;
to = ctx->epilogue_offset;
from = ctx->idx;
/* * For BPF_ALU | BPF_DIV | BPF_K instructions * As ARM_R1 and ARM_R0 contains 1st argument of bpf * function, we need to save it on caller side to save * it from getting destroyed within callee. * After the return from the callee, we restore ARM_R0 * ARM_R1.
*/ if (rn != ARM_R1) {
emit(ARM_MOV_R(tmp[0], ARM_R1), ctx);
emit(ARM_MOV_R(ARM_R1, rn), ctx);
} if (rm != ARM_R0) {
emit(ARM_MOV_R(tmp[1], ARM_R0), ctx);
emit(ARM_MOV_R(ARM_R0, rm), ctx);
}
/* Push caller-saved registers on stack */
emit(ARM_PUSH(CALLER_MASK), ctx);
/* * As we are implementing 64-bit div/mod as function calls, We need to put the dividend in * R0-R1 and the divisor in R2-R3. As we have already pushed these registers on the stack, * we can recover them later after returning from the function call.
*/ if (rm[1] != ARM_R0 || rn[1] != ARM_R2) { /* * Move Rm to {R1, R0} if it is not already there.
*/ if (rm[1] != ARM_R0) { if (rn[1] == ARM_R0)
emit(ARM_PUSH(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
emit(ARM_MOV_R(ARM_R1, rm[0]), ctx);
emit(ARM_MOV_R(ARM_R0, rm[1]), ctx); if (rn[1] == ARM_R0) {
emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx); goto cont;
}
} /* * Move Rn to {R3, R2} if it is not already there.
*/ if (rn[1] != ARM_R2) {
emit(ARM_MOV_R(ARM_R3, rn[0]), ctx);
emit(ARM_MOV_R(ARM_R2, rn[1]), ctx);
}
}
cont:
/* Call appropriate function */ if (sign) { if (op == BPF_DIV)
dst = (u32)jit_sdiv64; else
dst = (u32)jit_smod64;
} else { if (op == BPF_DIV)
dst = (u32)jit_udiv64; else
dst = (u32)jit_mod64;
}
/* Save return value */ if (rd[1] != ARM_R0) {
emit(ARM_MOV_R(rd[0], ARM_R1), ctx);
emit(ARM_MOV_R(rd[1], ARM_R0), ctx);
}
/* Recover {R3, R2} and {R1, R0} from stack if they are not Rd */ if (rd[1] != ARM_R0 && rd[1] != ARM_R2) {
emit(ARM_POP(CALLER_MASK), ctx);
} elseif (rd[1] != ARM_R0) {
emit(ARM_POP(BIT(ARM_R0) | BIT(ARM_R1)), ctx);
emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
} else {
emit(ARM_ADD_I(ARM_SP, ARM_SP, 8), ctx);
emit(ARM_POP(BIT(ARM_R2) | BIT(ARM_R3)), ctx);
}
}
/* Is the translated BPF register on stack? */ staticbool is_stacked(s8 reg)
{ return reg < 0;
}
/* If a BPF register is on the stack (stk is true), load it to the * supplied temporary register and return the temporary register * for subsequent operations, otherwise just use the CPU register.
*/ static s8 arm_bpf_get_reg32(s8 reg, s8 tmp, struct jit_ctx *ctx)
{ if (is_stacked(reg)) {
emit(ARM_LDR_I(tmp, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx);
reg = tmp;
} return reg;
}
/* If a BPF register is on the stack (stk is true), save the register * back to the stack. If the source register is not the same, then * move it into the correct register.
*/ staticvoid arm_bpf_put_reg32(s8 reg, s8 src, struct jit_ctx *ctx)
{ if (is_stacked(reg))
emit(ARM_STR_I(src, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(reg)), ctx); elseif (reg != src)
emit(ARM_MOV_R(reg, src), ctx);
}
if (is_stacked(dst_lo))
rd = tmp[1]; else
rd = dst_lo;
rs = arm_bpf_get_reg32(src_lo, rd, ctx); /* rs may be one of src[1], dst[1], or tmp[1] */
/* Sign extend rs if needed. If off == 32, lower 32-bits of src are moved to dst and sign * extension only happens in the upper 64 bits.
*/ if (off != 32) { /* Sign extend rs into rd */
emit(ARM_LSL_I(rd, rs, 32 - off), ctx);
emit(ARM_ASR_I(rd, rd, 32 - off), ctx);
} else {
rd = rs;
}
/* Write rd to dst_lo * * Optimization: * Assume: * 1. dst == src and stacked. * 2. off == 32 * * In this case src_lo was loaded into rd(tmp[1]) but rd was not sign extended as off==32. * So, we don't need to write rd back to dst_lo as they have the same value. * This saves us one str instruction.
*/ if (dst_lo != src_lo || off != 32)
arm_bpf_put_reg32(dst_lo, rd, ctx);
if (!is64) { if (!ctx->prog->aux->verifier_zext) /* Zero out high 4 bytes */
emit_a32_mov_i(dst_hi, 0, ctx);
} else { if (is_stacked(dst_hi)) {
emit(ARM_ASR_I(tmp[0], rd, 31), ctx);
arm_bpf_put_reg32(dst_hi, tmp[0], ctx);
} else {
emit(ARM_ASR_I(dst_hi, rd, 31), ctx);
}
}
}
switch (size) { case BPF_B: case BPF_W:
off_max = 0xfff; break; case BPF_H:
off_max = 0xff; break; case BPF_DW: /* Need to make sure off+4 does not overflow. */
off_max = 0xfff - 4; break;
} return -off_max <= off && off <= off_max;
}
if (!is_ldst_imm(off, sz)) {
emit_a32_mov_i(tmp[0], off, ctx);
emit(ARM_ADD_R(tmp[0], tmp[0], rd), ctx);
rd = tmp[0];
off = 0;
} switch (sz) { case BPF_B: /* Store a Byte */
emit(ARM_STRB_I(src_lo, rd, off), ctx); break; case BPF_H: /* Store a HalfWord */
emit(ARM_STRH_I(src_lo, rd, off), ctx); break; case BPF_W: /* Store a Word */
emit(ARM_STR_I(src_lo, rd, off), ctx); break; case BPF_DW: /* Store a Double Word */
emit(ARM_STR_I(src_lo, rd, off), ctx);
emit(ARM_STR_I(src_hi, rd, off + 4), ctx); break;
}
}
emit(ARM_ADD_I(ARM_SP, ARM_SP, imm8m(24)), ctx); // callee clean break;
} /* function return */ case BPF_JMP | BPF_EXIT: /* Optimization: when last instruction is EXIT * simply fallthrough to epilogue.
*/ if (i == ctx->prog->len - 1) break;
jmp_offset = epilogue_offset(ctx);
check_imm24(jmp_offset);
emit(ARM_B(jmp_offset), ctx); break;
notyet:
pr_info_once("*** NOT YET: opcode %02x ***\n", code); return -EFAULT; default:
pr_err_once("unknown opcode %02x\n", code); return -EINVAL;
}
if (ctx->flags & FLAG_IMM_OVERFLOW) /* * this instruction generated an overflow when * trying to access the literal pool, so * delegate this filter to the kernel interpreter.
*/ return -1; return 0;
}
/* If BPF JIT was not enabled then we must fall back to * the interpreter.
*/ if (!prog->jit_requested) return orig_prog;
/* If constant blinding was enabled and we failed during blinding * then we must fall back to the interpreter. Otherwise, we save * the new JITed code.
*/
tmp = bpf_jit_blind_constants(prog);
if (IS_ERR(tmp)) return orig_prog; if (tmp != prog) {
tmp_blinded = true;
prog = tmp;
}
/* Not able to allocate memory for offsets[] , then * we must fall back to the interpreter
*/
ctx.offsets = kcalloc(prog->len, sizeof(int), GFP_KERNEL); if (ctx.offsets == NULL) {
prog = orig_prog; goto out;
}
/* 1) fake pass to find in the length of the JITed code, * to compute ctx->offsets and other context variables * needed to compute final JITed code. * Also, calculate random starting pointer/start of JITed code * which is prefixed by random number of fault instructions. * * If the first pass fails then there is no chance of it * being successful in the second pass, so just fall back * to the interpreter.
*/ if (build_body(&ctx)) {
prog = orig_prog; goto out_off;
}
ctx.idx += ctx.imm_count; if (ctx.imm_count) {
ctx.imms = kcalloc(ctx.imm_count, sizeof(u32), GFP_KERNEL); if (ctx.imms == NULL) {
prog = orig_prog; goto out_off;
}
} #else /* there's nothing about the epilogue on ARMv7 */
build_epilogue(&ctx); #endif /* Now we can get the actual image size of the JITed arm code. * Currently, we are not considering the THUMB-2 instructions * for jit, although it can decrease the size of the image. * * As each arm instruction is of length 32bit, we are translating * number of JITed instructions into the size required to store these * JITed code.
*/
image_size = sizeof(u32) * ctx.idx;
/* Now we know the size of the structure to make */
header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); /* Not able to allocate memory for the structure then * we must fall back to the interpretation
*/ if (header == NULL) {
prog = orig_prog; goto out_imms;
}
/* 2.) Actual pass to generate final JIT code */
ctx.target = (u32 *) image_ptr;
ctx.idx = 0;
build_prologue(&ctx);
/* If building the body of the JITed code fails somehow, * we fall back to the interpretation.
*/ if (build_body(&ctx) < 0) goto out_free;
build_epilogue(&ctx);
/* 3.) Extra pass to validate JITed Code */ if (validate_code(&ctx)) goto out_free;
flush_icache_range((u32)header, (u32)(ctx.target + ctx.idx));
if (bpf_jit_enable > 1) /* there are 2 passes here */
bpf_jit_dump(prog->len, image_size, 2, ctx.target);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.