// SPDX-License-Identifier: GPL-2.0-only /* * Just-In-Time compiler for eBPF bytecode on MIPS. * Implementation of JIT functions common to 32-bit and 64-bit CPUs. * * Copyright (c) 2021 Anyfi Networks AB. * Author: Johan Almbladh <johan.almbladh@gmail.com> * * Based on code and ideas from * Copyright (c) 2017 Cavium, Inc. * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com> * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
*/
/* * Code overview * ============= * * - bpf_jit_comp.h * Common definitions and utilities. * * - bpf_jit_comp.c * Implementation of JIT top-level logic and exported JIT API functions. * Implementation of internal operations shared by 32-bit and 64-bit code. * JMP and ALU JIT control code, register control code, shared ALU and * JMP/JMP32 JIT operations. * * - bpf_jit_comp32.c * Implementation of functions to JIT prologue, epilogue and a single eBPF * instruction for 32-bit MIPS CPUs. The functions use shared operations * where possible, and implement the rest for 32-bit MIPS such as ALU64 * operations. * * - bpf_jit_comp64.c * Ditto, for 64-bit MIPS CPUs. * * Zero and sign extension * ======================== * 32-bit MIPS instructions on 64-bit MIPS registers use sign extension, * but the eBPF instruction set mandates zero extension. We let the verifier * insert explicit zero-extensions after 32-bit ALU operations, both for * 32-bit and 64-bit MIPS JITs. Conditional JMP32 operations on 64-bit MIPs * are JITed with sign extensions inserted when so expected. * * ALU operations * ============== * ALU operations on 32/64-bit MIPS and ALU64 operations on 64-bit MIPS are * JITed in the following steps. ALU64 operations on 32-bit MIPS are more * complicated and therefore only processed by special implementations in * step (3). * * 1) valid_alu_i: * Determine if an immediate operation can be emitted as such, or if * we must fall back to the register version. * * 2) rewrite_alu_i: * Convert BPF operation and immediate value to a canonical form for * JITing. In some degenerate cases this form may be a no-op. * * 3) emit_alu_{i,i64,r,64}: * Emit instructions for an ALU or ALU64 immediate or register operation. * * JMP operations * ============== * JMP and JMP32 operations require an JIT instruction offset table for * translating the jump offset. This table is computed by dry-running the * JIT without actually emitting anything. However, the computed PC-relative * offset may overflow the 18-bit offset field width of the native MIPS * branch instruction. In such cases, the long jump is converted into the * following sequence. * * <branch> !<cond> +2 Inverted PC-relative branch * nop Delay slot * j <offset> Unconditional absolute long jump * nop Delay slot * * Since this converted sequence alters the offset table, all offsets must * be re-calculated. This may in turn trigger new branch conversions, so * the process is repeated until no further changes are made. Normally it * completes in 1-2 iterations. If JIT_MAX_ITERATIONS should reached, we * fall back to converting every remaining jump operation. The branch * conversion is independent of how the JMP or JMP32 condition is JITed. * * JMP32 and JMP operations are JITed as follows. * * 1) setup_jmp_{i,r}: * Convert jump conditional and offset into a form that can be JITed. * This form may be a no-op, a canonical form, or an inverted PC-relative * jump if branch conversion is necessary. * * 2) valid_jmp_i: * Determine if an immediate operations can be emitted as such, or if * we must fall back to the register version. Applies to JMP32 for 32-bit * MIPS, and both JMP and JMP32 for 64-bit MIPS. * * 3) emit_jmp_{i,i64,r,r64}: * Emit instructions for an JMP or JMP32 immediate or register operation. * * 4) finish_jmp_{i,r}: * Emit any instructions needed to finish the jump. This includes a nop * for the delay slot if a branch was emitted, and a long absolute jump * if the branch was converted.
*/
/* * Push registers on the stack, starting at a given depth from the stack * pointer and increasing. The next depth to be written is returned.
*/ int push_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth)
{ int reg;
/* * Pop registers from the stack, starting at a given depth from the stack * pointer and increasing. The next depth to be read is returned.
*/ int pop_regs(struct jit_context *ctx, u32 mask, u32 excl, int depth)
{ int reg;
/* Compute the 28-bit jump target address from a BPF program location */ int get_target(struct jit_context *ctx, u32 loc)
{
u32 index = INDEX(ctx->descriptors[loc]); unsignedlong pc = (unsignedlong)&ctx->target[ctx->jit_index]; unsignedlong addr = (unsignedlong)&ctx->target[index];
if (!ctx->target) return 0;
if ((addr ^ pc) & ~MIPS_JMP_MASK) return -1;
return addr & MIPS_JMP_MASK;
}
/* Compute the PC-relative offset to relative BPF program offset */ int get_offset(conststruct jit_context *ctx, int off)
{ return (INDEX(ctx->descriptors[ctx->bpf_index + off]) -
ctx->jit_index - 1) * sizeof(u32);
}
/* Validate ALU immediate range */ bool valid_alu_i(u8 op, s32 imm)
{ switch (BPF_OP(op)) { case BPF_NEG: case BPF_LSH: case BPF_RSH: case BPF_ARSH: /* All legal eBPF values are valid */ returntrue; case BPF_ADD: if (IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS)) returnfalse; /* imm must be 16 bits */ return imm >= -0x8000 && imm <= 0x7fff; case BPF_SUB: if (IS_ENABLED(CONFIG_CPU_DADDI_WORKAROUNDS)) returnfalse; /* -imm must be 16 bits */ return imm >= -0x7fff && imm <= 0x8000; case BPF_AND: case BPF_OR: case BPF_XOR: /* imm must be 16 bits unsigned */ return imm >= 0 && imm <= 0xffff; case BPF_MUL: /* imm must be zero or a positive power of two */ return imm == 0 || (imm > 0 && is_power_of_2(imm)); case BPF_DIV: case BPF_MOD: /* imm must be an 17-bit power of two */ return (u32)imm <= 0x10000 && is_power_of_2((u32)imm);
} returnfalse;
}
emit(ctx, and, tmp, dst, msk); /* tmp = dst & msk */
emit(ctx, sll, tmp, tmp, 8); /* tmp = tmp << 8 */
emit(ctx, srl, dst, dst, 8); /* dst = dst >> 8 */
emit(ctx, and, dst, dst, msk); /* dst = dst & msk */
emit(ctx, or, dst, dst, tmp); /* reg = dst | tmp */
} break; /* Swap bytes in a half word */ case 16: if (cpu_has_mips32r2 || cpu_has_mips32r6) {
emit(ctx, wsbh, dst, dst);
emit(ctx, andi, dst, dst, 0xffff);
} else {
emit(ctx, andi, tmp, dst, 0xff00); /* t = d & 0xff00 */
emit(ctx, srl, tmp, tmp, 8); /* t = t >> 8 */
emit(ctx, andi, dst, dst, 0x00ff); /* d = d & 0x00ff */
emit(ctx, sll, dst, dst, 8); /* d = d << 8 */
emit(ctx, or, dst, dst, tmp); /* d = d | t */
} break;
}
clobber_reg(ctx, dst);
}
/* Validate jump immediate range */ bool valid_jmp_i(u8 op, s32 imm)
{ switch (op) { case JIT_JNOP: /* Immediate value not used */ returntrue; case BPF_JEQ: case BPF_JNE: /* No immediate operation */ returnfalse; case BPF_JSET: case JIT_JNSET: /* imm must be 16 bits unsigned */ return imm >= 0 && imm <= 0xffff; case BPF_JGE: case BPF_JLT: case BPF_JSGE: case BPF_JSLT: /* imm must be 16 bits */ return imm >= -0x8000 && imm <= 0x7fff; case BPF_JGT: case BPF_JLE: case BPF_JSGT: case BPF_JSLE: /* imm + 1 must be 16 bits */ return imm >= -0x8001 && imm <= 0x7ffe;
} returnfalse;
}
/* Invert a conditional jump operation */ static u8 invert_jmp(u8 op)
{ switch (op) { case BPF_JA: return JIT_JNOP; case BPF_JEQ: return BPF_JNE; case BPF_JNE: return BPF_JEQ; case BPF_JSET: return JIT_JNSET; case BPF_JGT: return BPF_JLE; case BPF_JGE: return BPF_JLT; case BPF_JLT: return BPF_JGE; case BPF_JLE: return BPF_JGT; case BPF_JSGT: return BPF_JSLE; case BPF_JSGE: return BPF_JSLT; case BPF_JSLT: return BPF_JSGE; case BPF_JSLE: return BPF_JSGT;
} return 0;
}
/* Prepare a PC-relative jump operation */ staticvoid setup_jmp(struct jit_context *ctx, u8 bpf_op,
s16 bpf_off, u8 *jit_op, s32 *jit_off)
{
u32 *descp = &ctx->descriptors[ctx->bpf_index]; int op = bpf_op; int offset = 0;
/* Do not compute offsets on the first pass */ if (INDEX(*descp) == 0) goto done;
/* Skip jumps never taken */ if (bpf_op == JIT_JNOP) goto done;
/* Convert jumps always taken */ if (bpf_op == BPF_JA)
*descp |= JIT_DESC_CONVERT;
/* * Current ctx->jit_index points to the start of the branch preamble. * Since the preamble differs among different branch conditionals, * the current index cannot be used to compute the branch offset. * Instead, we use the offset table value for the next instruction, * which gives the index immediately after the branch delay slot.
*/ if (!CONVERTED(*descp)) { int target = ctx->bpf_index + bpf_off + 1; int origin = ctx->bpf_index + 1;
/* * The PC-relative branch offset field on MIPS is 18 bits signed, * so if the computed offset is larger than this we generate a an * absolute jump that we skip with an inverted conditional branch.
*/ if (CONVERTED(*descp) || offset < -0x20000 || offset > 0x1ffff) {
offset = 3 * sizeof(u32);
op = invert_jmp(bpf_op);
ctx->changes += !CONVERTED(*descp);
*descp |= JIT_DESC_CONVERT;
}
switch (bpf_op) { case BPF_JEQ: case BPF_JNE: break; case BPF_JSET: case BPF_JLT:
never = imm == 0; break; case BPF_JGE:
always = imm == 0; break; case BPF_JGT:
never = (u32)imm == U32_MAX; break; case BPF_JLE:
always = (u32)imm == U32_MAX; break; case BPF_JSGT:
never = imm == S32_MAX && width == 32; break; case BPF_JSGE:
always = imm == S32_MIN && width == 32; break; case BPF_JSLT:
never = imm == S32_MIN && width == 32; break; case BPF_JSLE:
always = imm == S32_MAX && width == 32; break;
}
if (never)
bpf_op = JIT_JNOP; if (always)
bpf_op = BPF_JA;
setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off);
}
/* Prepare a PC-relative jump operation with register conditional */ void setup_jmp_r(struct jit_context *ctx, bool same_reg,
u8 bpf_op, s16 bpf_off, u8 *jit_op, s32 *jit_off)
{ switch (bpf_op) { case BPF_JSET: break; case BPF_JEQ: case BPF_JGE: case BPF_JLE: case BPF_JSGE: case BPF_JSLE: if (same_reg)
bpf_op = BPF_JA; break; case BPF_JNE: case BPF_JLT: case BPF_JGT: case BPF_JSGT: case BPF_JSLT: if (same_reg)
bpf_op = JIT_JNOP; break;
}
setup_jmp(ctx, bpf_op, bpf_off, jit_op, jit_off);
}
/* Finish a PC-relative jump operation */ int finish_jmp(struct jit_context *ctx, u8 jit_op, s16 bpf_off)
{ /* Emit conditional branch delay slot */ if (jit_op != JIT_JNOP)
emit(ctx, nop); /* * Emit an absolute long jump with delay slot, * if the PC-relative branch was converted.
*/ if (CONVERTED(ctx->descriptors[ctx->bpf_index])) { int target = get_target(ctx, ctx->bpf_index + bpf_off + 1);
/* We are guaranteed to have aligned memory. */ for (p = area; size >= sizeof(u32); size -= sizeof(u32))
uasm_i_break(&p, BRK_BUG); /* Increments p */
}
/* * If BPF JIT was not enabled then we must fall back to * the interpreter.
*/ if (!prog->jit_requested) return orig_prog; /* * If constant blinding was enabled and we failed during blinding * then we must fall back to the interpreter. Otherwise, we save * the new JITed code.
*/
tmp = bpf_jit_blind_constants(prog); if (IS_ERR(tmp)) return orig_prog; if (tmp != prog) {
tmp_blinded = true;
prog = tmp;
}
memset(&ctx, 0, sizeof(ctx));
ctx.program = prog;
/* * Not able to allocate memory for descriptors[], then * we must fall back to the interpreter
*/
ctx.descriptors = kcalloc(prog->len + 1, sizeof(*ctx.descriptors),
GFP_KERNEL); if (ctx.descriptors == NULL) goto out_err;
/* First pass discovers used resources */ if (build_body(&ctx) < 0) goto out_err; /* * Second pass computes instruction offsets. * If any PC-relative branches are out of range, a sequence of * a PC-relative branch + a jump is generated, and we have to * try again from the beginning to generate the new offsets. * This is done until no additional conversions are necessary. * The last two iterations are done with all branches being * converted, to guarantee offset table convergence within a * fixed number of iterations.
*/
ctx.jit_index = 0;
build_prologue(&ctx);
tmp_idx = ctx.jit_index;
tries = JIT_MAX_ITERATIONS; do {
ctx.jit_index = tmp_idx;
ctx.changes = 0; if (tries == 2)
set_convert_flag(&ctx, true); if (build_body(&ctx) < 0) goto out_err;
} while (ctx.changes > 0 && --tries > 0);
if (WARN_ONCE(ctx.changes > 0, "JIT offsets failed to converge")) goto out_err;
build_epilogue(&ctx, MIPS_R_RA);
/* Now we know the size of the structure to make */
image_size = sizeof(u32) * ctx.jit_index;
header = bpf_jit_binary_alloc(image_size, &image_ptr, sizeof(u32), jit_fill_hole); /* * Not able to allocate memory for the structure then * we must fall back to the interpretation
*/ if (header == NULL) goto out_err;
/* Actual pass to generate final JIT code */
ctx.target = (u32 *)image_ptr;
ctx.jit_index = 0;
/* * If building the JITed code fails somehow, * we fall back to the interpretation.
*/
build_prologue(&ctx); if (build_body(&ctx) < 0) goto out_err;
build_epilogue(&ctx, MIPS_R_RA);
/* Populate line info meta data */
set_convert_flag(&ctx, false);
bpf_prog_fill_jited_linfo(prog, &ctx.descriptors[1]);
/* Set as read-only exec and flush instruction cache */ if (bpf_jit_binary_lock_ro(header)) goto out_err;
flush_icache_range((unsignedlong)header,
(unsignedlong)&ctx.target[ctx.jit_index]);
if (bpf_jit_enable > 1)
bpf_jit_dump(prog->len, image_size, 2, ctx.target);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.