/* * opcodes we may need to refine support for: * * 0f - 2-byte instructions: For many of these instructions, the validity * depends on the prefix and/or the reg field. On such instructions, we * just consider the opcode combination valid if it corresponds to any * valid instruction. * * 8f - Group 1 - only reg = 0 is OK * c6-c7 - Group 11 - only reg = 0 is OK * d9-df - fpu insns with some illegal encodings * f2, f3 - repnz, repz prefixes. These are also the first byte for * certain floating-point instructions, such as addsd. * * fe - Group 4 - only reg = 0 or 1 is OK * ff - Group 5 - only reg = 0-6 is OK * * others -- Do we need to support these? * * 0f - (floating-point?) prefetch instructions * 07, 17, 1f - pop es, pop ss, pop ds * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes -- * but 64 and 65 (fs: and gs:) seem to be used, so we support them * 67 - addr16 prefix * ce - into * f0 - lock prefix
*/
/* * TODO: * - Where necessary, examine the modrm byte and allow only valid instructions * in the different Groups and fpu instructions.
*/
staticbool is_prefix_bad(struct insn *insn)
{
insn_byte_t p; int i;
for_each_insn_prefix(insn, i, p) {
insn_attr_t attr;
attr = inat_get_opcode_attribute(p); switch (attr) { case INAT_MAKE_PREFIX(INAT_PFX_ES): case INAT_MAKE_PREFIX(INAT_PFX_CS): case INAT_MAKE_PREFIX(INAT_PFX_DS): case INAT_MAKE_PREFIX(INAT_PFX_SS): case INAT_MAKE_PREFIX(INAT_PFX_LOCK): returntrue;
}
} returnfalse;
}
/* The uretprobe syscall replaces stored %rax value with final * return address, so we don't restore %rax in here and just * call ret.
*/ "retq\n" ".global uretprobe_trampoline_end\n" "uretprobe_trampoline_end:\n" ".popsection\n"
);
/* * At the moment the uretprobe syscall trampoline is supported * only for native 64-bit process, the compat process still uses * standard breakpoint.
*/ if (user_64bit_mode(regs)) {
*psize = uretprobe_trampoline_end - uretprobe_trampoline_entry; return uretprobe_trampoline_entry;
}
/* If there's no trampoline, we are called from wrong place. */
tramp = uprobe_get_trampoline_vaddr(); if (unlikely(tramp == UPROBE_NO_TRAMPOLINE_VADDR)) goto sigill;
/* Make sure the ip matches the only allowed sys_uretprobe caller. */ if (unlikely(regs->ip != trampoline_check_ip(tramp))) goto sigill;
/* expose the "right" values of r11/cx/ax/sp to uprobe_consumer/s */
regs->r11 = r11_cx_ax[0];
regs->cx = r11_cx_ax[1];
regs->ax = r11_cx_ax[2];
regs->sp += sizeof(r11_cx_ax);
regs->orig_ax = -1;
ip = regs->ip;
sp = regs->sp;
uprobe_handle_trampoline(regs);
/* * Some of the uprobe consumers has changed sp, we can do nothing, * just return via iret. * .. or shadow stack is enabled, in which case we need to skip * return through the user space stack address.
*/ if (regs->sp != sp || shstk_is_enabled()) return regs->ax;
regs->sp -= sizeof(r11_cx_ax);
/* for the case uprobe_consumer has changed r11/cx */
r11_cx_ax[0] = regs->r11;
r11_cx_ax[1] = regs->cx;
/* * ax register is passed through as return value, so we can use * its space on stack for ip value and jump to it through the * trampoline's ret instruction
*/
r11_cx_ax[2] = regs->ip;
regs->ip = ip;
/* * If arch_uprobe->insn doesn't use rip-relative addressing, return * immediately. Otherwise, rewrite the instruction so that it accesses * its memory operand indirectly through a scratch register. Set * defparam->fixups accordingly. (The contents of the scratch register * will be saved before we single-step the modified instruction, * and restored afterward). * * We do this because a rip-relative instruction can access only a * relatively small area (+/- 2 GB from the instruction), and the XOL * area typically lies beyond that area. At least for instructions * that store to memory, we can't execute the original instruction * and "fix things up" later, because the misdirected store could be * disastrous. * * Some useful facts about rip-relative instructions: * * - There's always a modrm byte with bit layout "00 reg 101". * - There's never a SIB byte. * - The displacement is always 4 bytes. * - REX.B=1 bit in REX prefix, which normally extends r/m field, * has no effect on rip-relative mode. It doesn't make modrm byte * with r/m=101 refer to register 1101 = R13.
*/ staticvoid riprel_analyze(struct arch_uprobe *auprobe, struct insn *insn)
{
u8 *cursor;
u8 reg;
u8 reg2;
if (!insn_rip_relative(insn)) return;
/* * insn_rip_relative() would have decoded rex_prefix, vex_prefix, modrm. * Clear REX.b bit (extension of MODRM.rm field): * we want to encode low numbered reg, not r8+.
*/ if (insn->rex_prefix.nbytes) {
cursor = auprobe->insn + insn_offset_rex_prefix(insn); /* REX byte has 0100wrxb layout, clearing REX.b bit */
*cursor &= 0xfe;
} /* * Similar treatment for VEX3/EVEX prefix. * TODO: add XOP treatment when insn decoder supports them
*/ if (insn->vex_prefix.nbytes >= 3) { /* * vex2: c5 rvvvvLpp (has no b bit) * vex3/xop: c4/8f rxbmmmmm wvvvvLpp * evex: 62 rxbR00mm wvvvv1pp zllBVaaa * Setting VEX3.b (setting because it has inverted meaning). * Setting EVEX.x since (in non-SIB encoding) EVEX.x * is the 4th bit of MODRM.rm, and needs the same treatment. * For VEX3-encoded insns, VEX3.x value has no effect in * non-SIB encoding, the change is superfluous but harmless.
*/
cursor = auprobe->insn + insn_offset_vex_prefix(insn) + 1;
*cursor |= 0x60;
}
/* * Convert from rip-relative addressing to register-relative addressing * via a scratch register. * * This is tricky since there are insns with modrm byte * which also use registers not encoded in modrm byte: * [i]div/[i]mul: implicitly use dx:ax * shift ops: implicitly use cx * cmpxchg: implicitly uses ax * cmpxchg8/16b: implicitly uses dx:ax and bx:cx * Encoding: 0f c7/1 modrm * The code below thinks that reg=1 (cx), chooses si as scratch. * mulx: implicitly uses dx: mulx r/m,r1,r2 does r1:r2 = dx * r/m. * First appeared in Haswell (BMI2 insn). It is vex-encoded. * Example where none of bx,cx,dx can be used as scratch reg: * c4 e2 63 f6 0d disp32 mulx disp32(%rip),%ebx,%ecx * [v]pcmpistri: implicitly uses cx, xmm0 * [v]pcmpistrm: implicitly uses xmm0 * [v]pcmpestri: implicitly uses ax, dx, cx, xmm0 * [v]pcmpestrm: implicitly uses ax, dx, xmm0 * Evil SSE4.2 string comparison ops from hell. * maskmovq/[v]maskmovdqu: implicitly uses (ds:rdi) as destination. * Encoding: 0f f7 modrm, 66 0f f7 modrm, vex-encoded: c5 f9 f7 modrm. * Store op1, byte-masked by op2 msb's in each byte, to (ds:rdi). * AMD says it has no 3-operand form (vex.vvvv must be 1111) * and that it can have only register operands, not mem * (its modrm byte must have mode=11). * If these restrictions will ever be lifted, * we'll need code to prevent selection of di as scratch reg! * * Summary: I don't know any insns with modrm byte which * use SI register implicitly. DI register is used only * by one insn (maskmovq) and BX register is used * only by one too (cmpxchg8b). * BP is stack-segment based (may be a problem?). * AX, DX, CX are off-limits (many implicit users). * SP is unusable (it's stack pointer - think about "pop mem"; * also, rsp+disp32 needs sib encoding -> insn length change).
*/
reg = MODRM_REG(insn); /* Fetch modrm.reg */
reg2 = 0xff; /* Fetch vex.vvvv */ if (insn->vex_prefix.nbytes)
reg2 = insn->vex_prefix.bytes[2]; /* * TODO: add XOP vvvv reading. * * vex.vvvv field is in bits 6-3, bits are inverted. * But in 32-bit mode, high-order bit may be ignored. * Therefore, let's consider only 3 low-order bits.
*/
reg2 = ((reg2 >> 3) & 0x7) ^ 0x7; /* * Register numbering is ax,cx,dx,bx, sp,bp,si,di, r8..r15. * * Choose scratch reg. Order is important: must not select bx * if we can use si (cmpxchg8b case!)
*/ if (reg != 6 && reg2 != 6) {
reg2 = 6;
auprobe->defparam.fixups |= UPROBE_FIX_RIP_SI;
} elseif (reg != 7 && reg2 != 7) {
reg2 = 7;
auprobe->defparam.fixups |= UPROBE_FIX_RIP_DI; /* TODO (paranoia): force maskmovq to not use di */
} else {
reg2 = 3;
auprobe->defparam.fixups |= UPROBE_FIX_RIP_BX;
} /* * Point cursor at the modrm byte. The next 4 bytes are the * displacement. Beyond the displacement, for some instructions, * is the immediate operand.
*/
cursor = auprobe->insn + insn_offset_modrm(insn); /* * Change modrm from "00 reg 101" to "10 reg reg2". Example: * 89 05 disp32 mov %eax,disp32(%rip) becomes * 89 86 disp32 mov %eax,disp32(%rsi)
*/
*cursor = 0x80 | (reg << 3) | reg2;
}
/* * If we're emulating a rip-relative instruction, save the contents * of the scratch register and store the target address in that register.
*/ staticvoid riprel_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{ if (auprobe->defparam.fixups & UPROBE_FIX_RIP_MASK) { struct uprobe_task *utask = current->utask; unsignedlong *sr = scratch_reg(auprobe, regs);
if (copy_to_user((void __user *)new_sp, &val, sizeof_long(regs))) return -EFAULT;
regs->sp = new_sp; return 0;
}
/* * We have to fix things up as follows: * * Typically, the new ip is relative to the copied instruction. We need * to make it relative to the original instruction (FIX_IP). Exceptions * are return instructions and absolute or indirect jump or call instructions. * * If the single-stepped instruction was a call, the return address that * is atop the stack is the address following the copied instruction. We * need to make it the address following the original instruction (FIX_CALL). * * If the original instruction was a rip-relative instruction such as * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent * instruction using a scratch register -- e.g., "movl %edx,0xnnnn(%rsi)". * We need to restore the contents of the scratch register * (FIX_RIP_reg).
*/ staticint default_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
{ struct uprobe_task *utask = current->utask;
riprel_post_xol(auprobe, regs); if (auprobe->defparam.fixups & UPROBE_FIX_IP) { long correction = utask->vaddr - utask->xol_vaddr;
regs->ip += correction;
} elseif (auprobe->defparam.fixups & UPROBE_FIX_CALL) {
regs->sp += sizeof_long(regs); /* Pop incorrect return address */ if (emulate_push_stack(regs, utask->vaddr + auprobe->defparam.ilen)) return -ERESTART;
} /* popf; tell the caller to not touch TF */ if (auprobe->defparam.fixups & UPROBE_FIX_SETF)
utask->autask.saved_tf = true;
if (branch_is_call(auprobe)) { /* * If it fails we execute this (mangled, see the comment in * branch_clear_offset) insn out-of-line. In the likely case * this should trigger the trap, and the probed application * should die or restart the same insn after it handles the * signal, arch_uprobe_post_xol() won't be even called. * * But there is corner case, see the comment in ->post_xol().
*/ if (emulate_push_stack(regs, new_ip)) returnfalse;
} elseif (!check_jmp_cond(auprobe, regs)) {
offs = 0;
}
if (emulate_push_stack(regs, *src_ptr)) returnfalse;
regs->ip += auprobe->push.ilen; returntrue;
}
staticint branch_post_xol_op(struct arch_uprobe *auprobe, struct pt_regs *regs)
{
BUG_ON(!branch_is_call(auprobe)); /* * We can only get here if branch_emulate_op() failed to push the ret * address _and_ another thread expanded our stack before the (mangled) * "call" insn was executed out-of-line. Just restore ->sp and restart. * We could also restore ->ip and try to call branch_emulate_op() again.
*/
regs->sp += sizeof_long(regs); return -ERESTART;
}
staticvoid branch_clear_offset(struct arch_uprobe *auprobe, struct insn *insn)
{ /* * Turn this insn into "call 1f; 1:", this is what we will execute * out-of-line if ->emulate() fails. We only need this to generate * a trap, so that the probed task receives the correct signal with * the properly filled siginfo. * * But see the comment in ->post_xol(), in the unlikely case it can * succeed. So we need to ensure that the new ->ip can not fall into * the non-canonical area and trigger #GP. * * We could turn it into (say) "pushf", but then we would need to * divorce ->insn[] and ->ixol[]. We need to preserve the 1st byte * of ->insn[] for set_orig_insn().
*/
memset(auprobe->insn + insn_offset_immediate(insn),
0, insn->immediate.nbytes);
}
/* Returns -ENOSYS if branch_xol_ops doesn't handle this insn */ staticint branch_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
{
u8 opc1 = OPCODE1(insn);
insn_byte_t p; int i;
/* x86_nops[insn->length]; same as jmp with .offs = 0 */ if (insn->length <= ASM_NOP_MAX &&
!memcmp(insn->kaddr, x86_nops[insn->length], insn->length)) goto setup;
switch (opc1) { case 0xeb: /* jmp 8 */ case 0xe9: /* jmp 32 */ break; case 0x90: /* prefix* + nop; same as jmp with .offs = 0 */ goto setup;
case 0xe8: /* call relative */
branch_clear_offset(auprobe, insn); break;
case 0x0f: if (insn->opcode.nbytes != 2) return -ENOSYS; /* * If it is a "near" conditional jmp, OPCODE2() - 0x10 matches * OPCODE1() of the "short" jmp which checks the same condition.
*/
opc1 = OPCODE2(insn) - 0x10;
fallthrough; default: if (!is_cond_jmp_opcode(opc1)) return -ENOSYS;
}
/* * 16-bit overrides such as CALLW (66 e8 nn nn) are not supported. * Intel and AMD behavior differ in 64-bit mode: Intel ignores 66 prefix. * No one uses these insns, reject any branch insns with such prefix.
*/
for_each_insn_prefix(insn, i, p) { if (p == 0x66) return -ENOTSUPP;
}
/** * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. * @auprobe: the probepoint information. * @mm: the probed address space. * @addr: virtual address at which to install the probepoint * Return 0 on success or a -ve number on error.
*/ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsignedlong addr)
{ struct insn insn;
u8 fix_ip_or_call = UPROBE_FIX_IP; int ret;
ret = uprobe_init_insn(auprobe, &insn, is_64bit_mm(mm)); if (ret) return ret;
ret = branch_setup_xol_ops(auprobe, &insn); if (ret != -ENOSYS) return ret;
ret = push_setup_xol_ops(auprobe, &insn); if (ret != -ENOSYS) return ret;
/* * Figure out which fixups default_post_xol_op() will need to perform, * and annotate defparam->fixups accordingly.
*/ switch (OPCODE1(&insn)) { case 0x9d: /* popf */
auprobe->defparam.fixups |= UPROBE_FIX_SETF; break; case 0xc3: /* ret or lret -- ip is correct */ case 0xcb: case 0xc2: case 0xca: case 0xea: /* jmp absolute -- ip is correct */
fix_ip_or_call = 0; break; case 0x9a: /* call absolute - Fix return addr, not ip */
fix_ip_or_call = UPROBE_FIX_CALL; break; case 0xff: switch (MODRM_REG(&insn)) { case 2: case 3: /* call or lcall, indirect */
fix_ip_or_call = UPROBE_FIX_CALL; break; case 4: case 5: /* jmp or ljmp, indirect */
fix_ip_or_call = 0; break;
}
fallthrough; default:
riprel_analyze(auprobe, &insn);
}
/* * arch_uprobe_pre_xol - prepare to execute out of line. * @auprobe: the probepoint information. * @regs: reflects the saved user state of current task.
*/ int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{ struct uprobe_task *utask = current->utask;
if (auprobe->ops->pre_xol) { int err = auprobe->ops->pre_xol(auprobe, regs); if (err) return err;
}
/* * If xol insn itself traps and generates a signal(Say, * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped * instruction jumps back to its own address. It is assumed that anything * like do_page_fault/do_trap/etc sets thread.trap_nr != -1. * * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr, * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol().
*/ bool arch_uprobe_xol_was_trapped(struct task_struct *t)
{ if (t->thread.trap_nr != UPROBE_TRAP_NR) returntrue;
returnfalse;
}
/* * Called after single-stepping. To avoid the SMP problems that can * occur when we temporarily put back the original opcode to * single-step, we single-stepped a copy of the instruction. * * This function prepares to resume execution after the single-step.
*/ int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{ struct uprobe_task *utask = current->utask; bool send_sigtrap = utask->autask.saved_tf; int err = 0;
if (auprobe->ops->post_xol) {
err = auprobe->ops->post_xol(auprobe, regs); if (err) { /* * Restore ->ip for restart or post mortem analysis. * ->post_xol() must not return -ERESTART unless this * is really possible.
*/
regs->ip = utask->vaddr; if (err == -ERESTART)
err = 0;
send_sigtrap = false;
}
} /* * arch_uprobe_pre_xol() doesn't save the state of TIF_BLOCKSTEP * so we can get an extra SIGTRAP if we do not clear TF. We need * to examine the opcode to make it right.
*/ if (send_sigtrap)
send_sig(SIGTRAP, current, 0);
if (!utask->autask.saved_tf)
regs->flags &= ~X86_EFLAGS_TF;
return err;
}
/* callback routine for handling exceptions. */ int arch_uprobe_exception_notify(struct notifier_block *self, unsignedlong val, void *data)
{ struct die_args *args = data; struct pt_regs *regs = args->regs; int ret = NOTIFY_DONE;
/* We are only interested in userspace traps */ if (regs && !user_mode(regs)) return NOTIFY_DONE;
switch (val) { case DIE_INT3: if (uprobe_pre_sstep_notifier(regs))
ret = NOTIFY_STOP;
break;
case DIE_DEBUG: if (uprobe_post_sstep_notifier(regs))
ret = NOTIFY_STOP;
break;
default: break;
}
return ret;
}
/* * This function gets called when XOL instruction either gets trapped or * the thread has a fatal signal. Reset the instruction pointer to its * probed address for the potential restart or for post mortem analysis.
*/ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
{ struct uprobe_task *utask = current->utask;
if (auprobe->ops->abort)
auprobe->ops->abort(auprobe, regs);
current->thread.trap_nr = utask->autask.saved_trap_nr;
regs->ip = utask->vaddr; /* clear TF if it was set by us in arch_uprobe_pre_xol() */ if (!utask->autask.saved_tf)
regs->flags &= ~X86_EFLAGS_TF;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.