struct kretprobe_blackpoint kretprobe_blacklist[] = {
{"__switch_to", }, /* This function switches only current task, but
doesn't switch kernel stack.*/
{NULL, NULL} /* Terminator */
};
/* Insert a jump instruction at address 'from', which jumps to address 'to'.*/ void synthesize_reljump(void *dest, void *from, void *to)
{
__synthesize_relative_insn(dest, from, to, JMP32_INSN_OPCODE);
}
NOKPROBE_SYMBOL(synthesize_reljump);
/* Insert a call instruction at address 'from', which calls address 'to'.*/ void synthesize_relcall(void *dest, void *from, void *to)
{
__synthesize_relative_insn(dest, from, to, CALL_INSN_OPCODE);
}
NOKPROBE_SYMBOL(synthesize_relcall);
/* * Returns non-zero if INSN is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode
*/ bool can_boost(struct insn *insn, void *addr)
{
kprobe_opcode_t opcode;
insn_byte_t prefix; int i;
if (search_exception_tables((unsignedlong)addr)) returnfalse; /* Page fault may occur on this address. */
kp = get_kprobe((void *)addr);
faddr = ftrace_location(addr) == addr; /* * Use the current code if it is not modified by Kprobe * and it cannot be modified by ftrace.
*/ if (!kp && !faddr) return addr;
/* * Basically, kp->ainsn.insn has an original instruction. * However, RIP-relative instruction can not do single-stepping * at different place, __copy_instruction() tweaks the displacement of * that instruction. In that case, we can't recover the instruction * from the kp->ainsn.insn. * * On the other hand, in case on normal Kprobe, kp->opcode has a copy * of the first byte of the probed instruction, which is overwritten * by int3. And the instruction at kp->addr is not modified by kprobes * except for the first byte, we can recover the original instruction * from it and kp->opcode. * * In case of Kprobes using ftrace, we do not have a copy of * the original instruction. In fact, the ftrace location might * be modified at anytime and even could be in an inconsistent state. * Fortunately, we know that the original code is the ideal 5-byte * long NOP.
*/ if (copy_from_kernel_nofault(buf, (void *)addr,
MAX_INSN_SIZE * sizeof(kprobe_opcode_t))) return 0UL;
/* * Recover the probed instruction at addr for further analysis. * Caller must lock kprobes by kprobe_mutex, or disable preemption * for preventing to release referencing kprobes. * Returns zero if the instruction can not get recovered (or access failed).
*/ unsignedlong recover_probed_instruction(kprobe_opcode_t *buf, unsignedlong addr)
{ unsignedlong __addr;
__addr = __recover_optprobed_insn(buf, addr); if (__addr != addr) return __addr;
return __recover_probed_insn(buf, addr);
}
/* Check if insn is INT or UD */ staticinlinebool is_exception_insn(struct insn *insn)
{ /* UD uses 0f escape */ if (insn->opcode.bytes[0] == 0x0f) { /* UD0 / UD1 / UD2 */ return insn->opcode.bytes[1] == 0xff ||
insn->opcode.bytes[1] == 0xb9 ||
insn->opcode.bytes[1] == 0x0b;
}
/* INT3 / INT n / INTO / INT1 */ return insn->opcode.bytes[0] == 0xcc ||
insn->opcode.bytes[0] == 0xcd ||
insn->opcode.bytes[0] == 0xce ||
insn->opcode.bytes[0] == 0xf1;
}
/* * Check if paddr is at an instruction boundary and that instruction can * be probed
*/ staticbool can_probe(unsignedlong paddr)
{ unsignedlong addr, __addr, offset = 0; struct insn insn;
kprobe_opcode_t buf[MAX_INSN_SIZE];
if (!kallsyms_lookup_size_offset(paddr, NULL, &offset)) returnfalse;
/* Decode instructions */
addr = paddr - offset; while (addr < paddr) { /* * Check if the instruction has been modified by another * kprobe, in which case we replace the breakpoint by the * original instruction in our buffer. * Also, jump optimization will change the breakpoint to * relative-jump. Since the relative-jump itself is * normally used, we just go through if there is no kprobe.
*/
__addr = recover_probed_instruction(buf, addr); if (!__addr) returnfalse;
if (insn_decode_kernel(&insn, (void *)__addr) < 0) returnfalse;
#ifdef CONFIG_KGDB /* * If there is a dynamically installed kgdb sw breakpoint, * this function should not be probed.
*/ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE &&
kgdb_has_hit_break(addr)) returnfalse; #endif
addr += insn.length;
}
/* Check if paddr is at an instruction boundary */ if (addr != paddr) returnfalse;
__addr = recover_probed_instruction(buf, addr); if (!__addr) returnfalse;
if (insn_decode_kernel(&insn, (void *)__addr) < 0) returnfalse;
/* INT and UD are special and should not be kprobed */ if (is_exception_insn(&insn)) returnfalse;
if (IS_ENABLED(CONFIG_CFI_CLANG)) { /* * The compiler generates the following instruction sequence * for indirect call checks and cfi.c decodes this; * * movl -<id>, %r10d ; 6 bytes * addl -4(%reg), %r10d ; 4 bytes * je .Ltmp1 ; 2 bytes * ud2 ; <- regs->ip * .Ltmp1: * * Also, these movl and addl are used for showing expected * type. So those must not be touched.
*/ if (insn.opcode.value == 0xBA)
offset = 12; elseif (insn.opcode.value == 0x3)
offset = 6; else goto out;
/* This movl/addl is used for decoding CFI. */ if (is_cfi_trap(addr + offset)) returnfalse;
}
out: returntrue;
}
/* If x86 supports IBT (ENDBR) it must be skipped. */
kprobe_opcode_t *arch_adjust_kprobe_addr(unsignedlong addr, unsignedlong offset, bool *on_func_entry)
{ if (is_endbr((u32 *)addr)) {
*on_func_entry = !offset || offset == 4; if (*on_func_entry)
offset = 4;
} else {
*on_func_entry = !offset;
}
return (kprobe_opcode_t *)(addr + offset);
}
/* * Copy an instruction with recovering modified instruction by kprobes * and adjust the displacement if the instruction uses the %rip-relative * addressing mode. Note that since @real will be the final place of copied * instruction, displacement must be adjust by @real, not @dest. * This returns the length of copied instruction, or 0 if it has an error.
*/ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
{
kprobe_opcode_t buf[MAX_INSN_SIZE]; unsignedlong recovered_insn = recover_probed_instruction(buf, (unsignedlong)src); int ret;
if (!recovered_insn || !insn) return 0;
/* This can access kernel text if given address is not recovered */ if (copy_from_kernel_nofault(dest, (void *)recovered_insn,
MAX_INSN_SIZE)) return 0;
ret = insn_decode_kernel(insn, dest); if (ret < 0) return 0;
/* We can not probe force emulate prefixed instruction */ if (insn_has_emulate_prefix(insn)) return 0;
/* Another subsystem puts a breakpoint, failed to recover */ if (insn->opcode.bytes[0] == INT3_INSN_OPCODE) return 0;
/* We should not singlestep on the exception masking instructions */ if (insn_masking_exception(insn)) return 0;
#ifdef CONFIG_X86_64 /* Only x86_64 has RIP relative instructions */ if (insn_rip_relative(insn)) {
s64 newdisp;
u8 *disp; /* * The copied instruction uses the %rip-relative addressing * mode. Adjust the displacement for the difference between * the original location of this instruction and the location * of the copy that will actually be run. The tricky bit here * is making sure that the sign extension happens correctly in * this calculation, since we need a signed 32-bit result to * be sign-extended to 64 bits when it's added to the %rip * value and yield the same 64-bit result that the sign- * extension of the original signed 32-bit displacement would * have given.
*/
newdisp = (u8 *) src + (s64) insn->displacement.value
- (u8 *) real; if ((s64) (s32) newdisp != newdisp) {
pr_err("Kprobes error: new displacement does not fit into s32 (%llx)\n", newdisp); return 0;
}
disp = (u8 *) dest + insn_offset_displacement(insn);
*(s32 *) disp = (s32) newdisp;
} #endif return insn->length;
}
/* Prepare reljump or int3 right after instruction */ staticint prepare_singlestep(kprobe_opcode_t *buf, struct kprobe *p, struct insn *insn)
{ int len = insn->length;
if (!IS_ENABLED(CONFIG_PREEMPTION) &&
!p->post_handler && can_boost(insn, p->addr) &&
MAX_INSN_SIZE - len >= JMP32_INSN_SIZE) { /* * These instructions can be executed directly if it * jumps back to correct address.
*/
synthesize_reljump(buf + len, p->ainsn.insn + len,
p->addr + insn->length);
len += JMP32_INSN_SIZE;
p->ainsn.boostable = 1;
} else { /* Otherwise, put an int3 for trapping singlestep */ if (MAX_INSN_SIZE - len < INT3_INSN_SIZE) return -ENOSPC;
buf[len] = INT3_INSN_OPCODE;
len += INT3_INSN_SIZE;
}
return len;
}
/* Kprobe x86 instruction emulation - only regs->ip or IF flag modifiers */
switch (opcode) { case 0xfa: /* cli */ case 0xfb: /* sti */ case 0x9c: /* pushfl */ case 0x9d: /* popf/popfd */ /* * IF modifiers must be emulated since it will enable interrupt while * int3 single stepping.
*/
p->ainsn.emulate_op = kprobe_emulate_ifmodifiers;
p->ainsn.opcode = opcode; break; case 0xc2: /* ret/lret */ case 0xc3: case 0xca: case 0xcb:
p->ainsn.emulate_op = kprobe_emulate_ret; break; case 0x9a: /* far call absolute -- segment is not supported */ case 0xea: /* far jmp absolute -- segment is not supported */ case 0xcc: /* int3 */ case 0xcf: /* iret -- in-kernel IRET is not supported */ return -EOPNOTSUPP; break; case 0xe8: /* near call relative */
p->ainsn.emulate_op = kprobe_emulate_call; if (insn->immediate.nbytes == 2)
p->ainsn.rel32 = *(s16 *)&insn->immediate.value; else
p->ainsn.rel32 = *(s32 *)&insn->immediate.value; break; case 0xeb: /* short jump relative */ case 0xe9: /* near jump relative */
p->ainsn.emulate_op = kprobe_emulate_jmp; if (insn->immediate.nbytes == 1)
p->ainsn.rel32 = *(s8 *)&insn->immediate.value; elseif (insn->immediate.nbytes == 2)
p->ainsn.rel32 = *(s16 *)&insn->immediate.value; else
p->ainsn.rel32 = *(s32 *)&insn->immediate.value; break; case 0x70 ... 0x7f: /* 1 byte conditional jump */
p->ainsn.emulate_op = kprobe_emulate_jcc;
p->ainsn.jcc.type = opcode & 0xf;
p->ainsn.rel32 = insn->immediate.value; break; case 0x0f:
opcode = insn->opcode.bytes[1]; if ((opcode & 0xf0) == 0x80) { /* 2 bytes Conditional Jump */
p->ainsn.emulate_op = kprobe_emulate_jcc;
p->ainsn.jcc.type = opcode & 0xf; if (insn->immediate.nbytes == 2)
p->ainsn.rel32 = *(s16 *)&insn->immediate.value; else
p->ainsn.rel32 = *(s32 *)&insn->immediate.value;
} elseif (opcode == 0x01 &&
X86_MODRM_REG(insn->modrm.bytes[0]) == 0 &&
X86_MODRM_MOD(insn->modrm.bytes[0]) == 3) { /* VM extensions - not supported */ return -EOPNOTSUPP;
} break; case 0xe0: /* Loop NZ */ case 0xe1: /* Loop */ case 0xe2: /* Loop */ case 0xe3: /* J*CXZ */
p->ainsn.emulate_op = kprobe_emulate_loop;
p->ainsn.loop.type = opcode & 0x3;
p->ainsn.loop.asize = insn->addr_bytes * 8;
p->ainsn.rel32 = *(s8 *)&insn->immediate.value; break; case 0xff: /* * Since the 0xff is an extended group opcode, the instruction * is determined by the MOD/RM byte.
*/
opcode = insn->modrm.bytes[0]; switch (X86_MODRM_REG(opcode)) { case 0b010: /* FF /2, call near, absolute indirect */
p->ainsn.emulate_op = kprobe_emulate_call_indirect; break; case 0b100: /* FF /4, jmp near, absolute indirect */
p->ainsn.emulate_op = kprobe_emulate_jmp_indirect; break; case 0b011: /* FF /3, call far, absolute indirect */ case 0b101: /* FF /5, jmp far, absolute indirect */ return -EOPNOTSUPP;
}
if (!p->ainsn.emulate_op) break;
if (insn->addr_bytes != sizeof(unsignedlong)) return -EOPNOTSUPP; /* Don't support different size */ if (X86_MODRM_MOD(opcode) != 3) return -EOPNOTSUPP; /* TODO: support memory addressing */
/* Copy an instruction with recovering if other optprobe modifies it.*/
len = __copy_instruction(buf, p->addr, p->ainsn.insn, &insn); if (!len) return -EINVAL;
/* Analyze the opcode and setup emulate functions */
ret = prepare_emulation(p, &insn); if (ret < 0) return ret;
/* Add int3 for single-step or booster jmp */
len = prepare_singlestep(buf, p, &insn); if (len < 0) return len;
/* Also, displacement change doesn't affect the first byte */
p->opcode = buf[0];
staticvoid kprobe_post_process(struct kprobe *cur, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{ /* Restore back the original saved kprobes variables and continue. */ if (kcb->kprobe_status == KPROBE_REENTER) { /* This will restore both kcb and current_kprobe */
restore_previous_kprobe(kcb);
} else { /* * Always update the kcb status because * reset_curent_kprobe() doesn't update kcb.
*/
kcb->kprobe_status = KPROBE_HIT_SSDONE; if (cur->post_handler)
cur->post_handler(cur, regs, 0);
reset_current_kprobe();
}
}
NOKPROBE_SYMBOL(kprobe_post_process);
staticvoid setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb, int reenter)
{ if (setup_detour_execution(p, regs, reenter)) return;
#if !defined(CONFIG_PREEMPTION) if (p->ainsn.boostable) { /* Boost up -- we can execute copied instructions directly */ if (!reenter)
reset_current_kprobe(); /* * Reentering boosted probe doesn't reset current_kprobe, * nor set current_kprobe, because it doesn't use single * stepping.
*/
regs->ip = (unsignedlong)p->ainsn.insn; return;
} #endif if (reenter) {
save_previous_kprobe(kcb);
set_current_kprobe(p, regs, kcb);
kcb->kprobe_status = KPROBE_REENTER;
} else
kcb->kprobe_status = KPROBE_HIT_SS;
if (p->ainsn.emulate_op) {
p->ainsn.emulate_op(p, regs);
kprobe_post_process(p, regs, kcb); return;
}
/* Disable interrupt, and set ip register on trampoline */
regs->flags &= ~X86_EFLAGS_IF;
regs->ip = (unsignedlong)p->ainsn.insn;
}
NOKPROBE_SYMBOL(setup_singlestep);
/* * Called after single-stepping. p->addr is the address of the * instruction whose first byte has been replaced by the "int3" * instruction. To avoid the SMP problems that can occur when we * temporarily put back the original opcode to single-step, we * single-stepped a copy of the instruction. The address of this * copy is p->ainsn.insn. We also doesn't use trap, but "int3" again * right after the copied instruction. * Different from the trap single-step, "int3" single-step can not * handle the instruction which changes the ip register, e.g. jmp, * call, conditional jmp, and the instructions which changes the IF * flags because interrupt must be disabled around the single-stepping. * Such instructions are software emulated, but others are single-stepped * using "int3". * * When the 2nd "int3" handled, the regs->ip and regs->flags needs to * be adjusted, so that we can resume execution on correct code.
*/ staticvoid resume_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{ unsignedlong copy_ip = (unsignedlong)p->ainsn.insn; unsignedlong orig_ip = (unsignedlong)p->addr;
/* Restore saved interrupt flag and ip register */
regs->flags |= kcb->kprobe_saved_flags; /* Note that regs->ip is executed int3 so must be a step back */
regs->ip += (orig_ip - copy_ip) - INT3_INSN_SIZE;
}
NOKPROBE_SYMBOL(resume_singlestep);
/* * We have reentered the kprobe_handler(), since another probe was hit while * within the handler. We save the original kprobes variables and just single * step on the instruction of the new probe without calling any user handlers.
*/ staticint reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb)
{ switch (kcb->kprobe_status) { case KPROBE_HIT_SSDONE: case KPROBE_HIT_ACTIVE: case KPROBE_HIT_SS:
kprobes_inc_nmissed_count(p);
setup_singlestep(p, regs, kcb, 1); break; case KPROBE_REENTER: /* A probe has been hit in the codepath leading up to, or just * after, single-stepping of a probed instruction. This entire * codepath should strictly reside in .kprobes.text section. * Raise a BUG or we'll continue in an endless reentering loop * and eventually a stack overflow.
*/
pr_err("Unrecoverable kprobe detected.\n");
dump_kprobe(p);
BUG(); default: /* impossible cases */
WARN_ON(1); return 0;
}
/* * Interrupts are disabled on entry as trap3 is an interrupt gate and they * remain disabled throughout this function.
*/ int kprobe_int3_handler(struct pt_regs *regs)
{
kprobe_opcode_t *addr; struct kprobe *p; struct kprobe_ctlblk *kcb;
if (user_mode(regs)) return 0;
addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t)); /* * We don't want to be preempted for the entire duration of kprobe * processing. Since int3 and debug trap disables irqs and we clear * IF while singlestepping, it must be no preemptible.
*/
kcb = get_kprobe_ctlblk();
p = get_kprobe(addr);
if (p) { if (kprobe_running()) { if (reenter_kprobe(p, regs, kcb)) return 1;
} else {
set_current_kprobe(p, regs, kcb);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
/* * If we have no pre-handler or it returned 0, we * continue with normal processing. If we have a * pre-handler and it returned non-zero, that means * user handler setup registers to exit to another * instruction, we must skip the single stepping.
*/ if (!p->pre_handler || !p->pre_handler(p, regs))
setup_singlestep(p, regs, kcb, 0); else
reset_current_kprobe(); return 1;
}
} elseif (kprobe_is_ss(kcb)) {
p = kprobe_running(); if ((unsignedlong)p->ainsn.insn < regs->ip &&
(unsignedlong)p->ainsn.insn + MAX_INSN_SIZE > regs->ip) { /* Most provably this is the second int3 for singlestep */
resume_singlestep(p, regs, kcb);
kprobe_post_process(p, regs, kcb); return 1;
}
} /* else: not a kprobe fault; let the kernel handle it */
return 0;
}
NOKPROBE_SYMBOL(kprobe_int3_handler);
int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
{ struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
if (unlikely(regs->ip == (unsignedlong)cur->ainsn.insn)) { /* This must happen on single-stepping */
WARN_ON(kcb->kprobe_status != KPROBE_HIT_SS &&
kcb->kprobe_status != KPROBE_REENTER); /* * We are here because the instruction being single * stepped caused a page fault. We reset the current * kprobe and the ip points back to the probe address * and allow the page fault handler to continue as a * normal page fault.
*/
regs->ip = (unsignedlong)cur->addr;
/* * If the IF flag was set before the kprobe hit, * don't touch it:
*/
regs->flags |= kcb->kprobe_old_flags;
if (kcb->kprobe_status == KPROBE_REENTER)
restore_previous_kprobe(kcb); else
reset_current_kprobe();
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.