/* Initialize a thunk with the "jmp *reg; int3" instructions. */ staticvoid *its_init_thunk(void *thunk, int reg)
{
u8 *bytes = thunk; int offset = 0; int i = 0;
#ifdef CONFIG_FINEIBT if (cfi_paranoid) { /* * When ITS uses indirect branch thunk the fineibt_paranoid * caller sequence doesn't fit in the caller site. So put the * remaining part of the sequence (<ea> + JNE) into the ITS * thunk.
*/
bytes[i++] = 0xea; /* invalid instruction */
bytes[i++] = 0x75; /* JNE */
bytes[i++] = 0xfd;
#ifdef CONFIG_FINEIBT /* * The ITS thunk contains an indirect jump and an int3 instruction so * its size is 3 or 4 bytes depending on the register used. If CFI * paranoid is used then 3 extra bytes are added in the ITS thunk to * complete the fineibt_paranoid caller sequence.
*/ if (cfi_paranoid)
size += 3; #endif
/* * If the indirect branch instruction will be in the lower half * of a cacheline, then update the offset to reach the upper half.
*/ if ((its_offset + size - 1) % 64 < 32)
its_offset = ((its_offset - 1) | 0x3F) + 33;
/* * Nomenclature for variable names to simplify and clarify this code and ease * any potential staring at it: * * @instr: source address of the original instructions in the kernel text as * generated by the compiler. * * @buf: temporary buffer on which the patching operates. This buffer is * eventually text-poked into the kernel image. * * @replacement/@repl: pointer to the opcodes which are replacing @instr, located * in the .altinstr_replacement section.
*/
/* * Fill the buffer with a single effective instruction of size @len. * * In order not to issue an ORC stack depth tracking CFI entry (Call Frame Info) * for every single-byte NOP, try to generate the maximally available NOP of * size <= ASM_NOP_MAX such that only a single CFI entry is generated (vs one for * each single-byte NOPs). If @len to fill out is > ASM_NOP_MAX, pad with INT3 and * *jump* over instead of executing long and daft NOPs.
*/ staticvoid add_nop(u8 *buf, unsignedint len)
{
u8 *target = buf + len;
if (!len) return;
if (len <= ASM_NOP_MAX) {
memcpy(buf, x86_nops[len], len); return;
}
for (;buf < target; buf++)
*buf = INT3_INSN_OPCODE;
}
/* * Matches NOP and NOPL, not any of the other possible NOPs.
*/ staticbool insn_is_nop(struct insn *insn)
{ /* Anything NOP, but no REP NOP */ if (insn->opcode.bytes[0] == 0x90 &&
(!insn->prefixes.nbytes || insn->prefixes.bytes[0] != 0xF3)) returntrue;
/* * Find the offset of the first non-NOP instruction starting at @offset * but no further than @len.
*/ staticint skip_nops(u8 *buf, int offset, int len)
{ struct insn insn;
for (; offset < len; offset += insn.length) { if (insn_decode_kernel(&insn, &buf[offset])) break;
if (!insn_is_nop(&insn)) break;
}
return offset;
}
/* * "noinline" to cause control flow change and thus invalidate I$ and * cause refetch after modification.
*/ staticvoid noinline optimize_nops(const u8 * const instr, u8 *buf, size_t len)
{ for (int next, i = 0; i < len; i = next) { struct insn insn;
if (insn_decode_kernel(&insn, &buf[i])) return;
next = i + insn.length;
if (insn_is_nop(&insn)) { int nop = i;
/* Has the NOP already been optimized? */ if (i + insn.length == len) return;
/* * In this context, "source" is where the instructions are placed in the * section .altinstr_replacement, for example during kernel build by the * toolchain. * "Destination" is where the instructions are being patched in by this * machinery. * * The source offset is: * * src_imm = target - src_next_ip (1) * * and the target offset is: * * dst_imm = target - dst_next_ip (2) * * so rework (1) as an expression for target like: * * target = src_imm + src_next_ip (1a) * * and substitute in (2) to get: * * dst_imm = (src_imm + src_next_ip) - dst_next_ip (3) * * Now, since the instruction stream is 'identical' at src and dst (it * is being copied after all) it can be stated that: * * src_next_ip = src + ip_offset * dst_next_ip = dst + ip_offset (4) * * Substitute (4) in (3) and observe ip_offset being cancelled out to * obtain: * * dst_imm = src_imm + (src + ip_offset) - (dst + ip_offset) * = src_imm + src - dst + ip_offset - ip_offset * = src_imm + src - dst (5) * * IOW, only the relative displacement of the code block matters.
*/
#define apply_reloc_n(n_, p_, d_) \ do { \
s32 v = *(s##n_ *)(p_); \
v += (d_); \
BUG_ON((v >> 31) != (v >> (n_-1))); \
*(s##n_ *)(p_) = (s##n_)v; \
} while (0)
static __always_inline void apply_reloc(int n, void *ptr, uintptr_t diff)
{ switch (n) { case 1: apply_reloc_n(8, ptr, diff); break; case 2: apply_reloc_n(16, ptr, diff); break; case 4: apply_reloc_n(32, ptr, diff); break; default: BUG();
}
}
static __always_inline bool need_reloc(unsignedlong offset, u8 *src, size_t src_len)
{
u8 *target = src + offset; /* * If the target is inside the patched block, it's relative to the * block itself and does not need relocation.
*/ return (target < src || target > src + src_len);
}
staticvoid __apply_relocation(u8 *buf, const u8 * const instr, size_t instrlen, u8 *repl, size_t repl_len)
{ for (int next, i = 0; i < instrlen; i = next) { struct insn insn;
if (WARN_ON_ONCE(insn_decode_kernel(&insn, &buf[i]))) return;
next = i + insn.length;
switch (insn.opcode.bytes[0]) { case 0x0f: if (insn.opcode.bytes[1] < 0x80 ||
insn.opcode.bytes[1] > 0x8f) break;
fallthrough; /* Jcc.d32 */ case 0x70 ... 0x7f: /* Jcc.d8 */ case JMP8_INSN_OPCODE: case JMP32_INSN_OPCODE: case CALL_INSN_OPCODE: if (need_reloc(next + insn.immediate.value, repl, repl_len)) {
apply_reloc(insn.immediate.nbytes,
buf + i + insn_offset_immediate(&insn),
repl - instr);
}
/* * Rewrite the "call BUG_func" replacement to point to the target of the * indirect pv_ops call "call *disp(%ip)".
*/ staticint alt_replace_call(u8 *instr, u8 *insn_buff, struct alt_instr *a)
{ void *target, *bug = &BUG_func;
s32 disp;
if (a->replacementlen != 5 || insn_buff[0] != CALL_INSN_OPCODE) {
pr_err("ALT_FLAG_DIRECT_CALL set for a non-call replacement instruction\n");
BUG();
}
if (a->instrlen != 6 ||
instr[0] != CALL_RIP_REL_OPCODE ||
instr[1] != CALL_RIP_REL_MODRM) {
pr_err("ALT_FLAG_DIRECT_CALL set for unrecognized indirect call\n");
BUG();
}
/* * Replace instructions with better alternatives for this CPU type. This runs * before SMP is initialized to avoid SMP problems with self modifying code. * This implies that asymmetric systems where APs have less capabilities than * the boot processor are not handled. Tough. Make sure you disable such * features by hand. * * Marked "noinline" to cause control flow change and thus insn cache * to refetch changed I$ lines.
*/ void __init_or_module noinline apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
u8 insn_buff[MAX_PATCH_LEN];
u8 *instr, *replacement; struct alt_instr *a, *b;
/* * KASAN_SHADOW_START is defined using * cpu_feature_enabled(X86_FEATURE_LA57) and is therefore patched here. * During the process, KASAN becomes confused seeing partial LA57 * conversion and triggers a false-positive out-of-bound report. * * Disable KASAN until the patching is complete.
*/
kasan_disable_current();
/* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. * Some kernel functions (e.g. memcpy, memset, etc) use this order to * patch code. * * So be careful if you want to change the scan order to any other * order.
*/ for (a = start; a < end; a++) { int insn_buff_sz = 0;
/* * In case of nested ALTERNATIVE()s the outer alternative might * add more padding. To ensure consistent patching find the max * padding for all alt_instr entries for this site (nested * alternatives result in consecutive entries).
*/ for (b = a+1; b < end && instr_va(b) == instr_va(a); b++) {
u8 len = max(a->instrlen, b->instrlen);
a->instrlen = b->instrlen = len;
}
/* * Patch if either: * - feature is present * - feature not present but ALT_FLAG_NOT is set to mean, * patch if feature is *NOT* present.
*/ if (!boot_cpu_has(a->cpuid) == !(a->flags & ALT_FLAG_NOT)) {
memcpy(insn_buff, instr, a->instrlen);
optimize_nops(instr, insn_buff, a->instrlen);
text_poke_early(instr, insn_buff, a->instrlen); continue;
}
/* Check if an indirect branch is at ITS-unsafe address */ staticbool cpu_wants_indirect_its_thunk_at(unsignedlong addr, int reg)
{ if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) returnfalse;
/* Indirect branch opcode is 2 or 3 bytes depending on reg */
addr += 1 + reg / 8;
/* Lower-half of the cacheline? */ return !(addr & 0x20);
} #else/* CONFIG_MITIGATION_ITS */
/* Continue as if: JMP.d32 __x86_indirect_thunk_\reg */
op = JMP32_INSN_OPCODE;
}
/* * For RETPOLINE_LFENCE: prepend the indirect CALL/JMP with an LFENCE.
*/ if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
bytes[i++] = 0x0f;
bytes[i++] = 0xae;
bytes[i++] = 0xe8; /* LFENCE */
}
#ifdef CONFIG_MITIGATION_ITS /* * Check if the address of last byte of emitted-indirect is in * lower-half of the cacheline. Such branches need ITS mitigation.
*/ if (cpu_wants_indirect_its_thunk_at((unsignedlong)addr + i, reg)) return emit_its_trampoline(addr, insn, reg, bytes); #endif
ret = emit_indirect(op, reg, bytes + i); if (ret < 0) return ret;
i += ret;
/* * The compiler is supposed to EMIT an INT3 after every unconditional * JMP instruction due to AMD BTC. However, if the compiler is too old * or MITIGATION_SLS isn't enabled, we still need an INT3 after * indirect JMPs even on Intel.
*/ if (op == JMP32_INSN_OPCODE && i < insn->length)
bytes[i++] = INT3_INSN_OPCODE;
for (; i < insn->length;)
bytes[i++] = BYTES_NOP1;
bool cpu_wants_rethunk_at(void *addr)
{ if (!cpu_feature_enabled(X86_FEATURE_RETHUNK)) returnfalse; if (x86_return_thunk != its_return_thunk) returntrue;
return !((unsignedlong)addr & 0x20);
}
/* * Rewrite the compiler generated return thunk tail-calls. * * For example, convert: * * JMP __x86_return_thunk * * into: * * RET
*/ staticint patch_return(void *addr, struct insn *insn, u8 *bytes)
{ int i = 0;
/* Patch the custom return thunks... */ if (cpu_wants_rethunk_at(addr)) {
i = JMP32_INSN_SIZE;
__text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i);
} else { /* ... or patch them out if not needed. */
bytes[i++] = RET_INSN_OPCODE;
}
for (; i < insn->length;)
bytes[i++] = INT3_INSN_OPCODE; return i;
}
/* * Re-hash the CFI hash with a boot-time seed while making sure the result is * not a valid ENDBR instruction.
*/ static u32 cfi_rehash(u32 hash)
{
hash ^= cfi_seed; while (unlikely(__is_endbr(hash) || __is_endbr(-hash))) { bool lsb = hash & 1;
hash >>= 1; if (lsb)
hash ^= 0x80200003;
} return hash;
}
static __init int cfi_parse_cmdline(char *str)
{ if (!str) return -EINVAL;
while (str) { char *next = strchr(str, ','); if (next) {
*next = 0;
next++;
}
/* * Since FineIBT does hash validation on the callee side it is prone to * circumvention attacks where a 'naked' ENDBR instruction exists that * is not part of the fineibt_preamble sequence. * * Notably the x86 entry points must be ENDBR and equally cannot be * fineibt_preamble. * * The fineibt_paranoid caller sequence adds additional caller side * hash validation. This stops such circumvention attacks dead, but at the cost * of adding a load. * * <fineibt_paranoid_start>: * 0: 41 ba 78 56 34 12 mov $0x12345678, %r10d * 6: 45 3b 53 f7 cmp -0x9(%r11), %r10d * a: 4d 8d 5b <f0> lea -0x10(%r11), %r11 * e: 75 fd jne d <fineibt_paranoid_start+0xd> * 10: 41 ff d3 call *%r11 * 13: 90 nop * * Notably LEA does not modify flags and can be reordered with the CMP, * avoiding a dependency. Again, using a non-taken (backwards) branch * for the failure case, abusing LEA's immediate 0xf0 as LOCK prefix for the * Jcc.d8, causing #UD.
*/ asm( ".pushsection .rodata \n" "fineibt_paranoid_start: \n" " movl $0x12345678, %r10d \n" " cmpl -9(%r11), %r10d \n" " lea -0x10(%r11), %r11 \n" " jne fineibt_paranoid_start+0xd \n" "fineibt_paranoid_ind: \n" " call *%r11 \n" " nop \n" "fineibt_paranoid_end: \n" ".popsection \n"
);
/* .retpoline_sites */ staticint cfi_disable_callers(s32 *start, s32 *end)
{ /* * Disable kCFI by patching in a JMP.d8, this leaves the hash immediate * in tact for later usage. Also see decode_caller_hash() and * cfi_rewrite_callers().
*/ const u8 jmp[] = { JMP8_INSN_OPCODE, fineibt_caller_jmp };
s32 *s;
for (s = start; s < end; s++) { void *addr = (void *)s + *s;
u32 hash;
for (s = start; s < end; s++) { void *addr = (void *)s + *s; int arity;
u32 hash;
/* * When the function doesn't start with ENDBR the compiler will * have determined there are no indirect calls to it and we * don't need no CFI either.
*/ if (!is_endbr(addr + 16)) continue;
hash = decode_preamble_hash(addr, &arity); if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
addr, addr, 5, addr)) return -EINVAL;
if (cfi_mode == CFI_AUTO) {
cfi_mode = CFI_KCFI; if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) { /* * FRED has much saner context on exception entry and * is less easy to take advantage of.
*/ if (!cpu_feature_enabled(X86_FEATURE_FRED))
cfi_paranoid = true;
cfi_mode = CFI_FINEIBT;
}
}
/* * Rewrite the callers to not use the __cfi_ stubs, such that we might * rewrite them. This disables all CFI. If this succeeds but any of the * later stages fails, we're without CFI.
*/
ret = cfi_disable_callers(start_retpoline, end_retpoline); if (ret) goto err;
if (cfi_rand) { if (builtin) {
cfi_seed = get_random_u32();
cfi_bpf_hash = cfi_rehash(cfi_bpf_hash);
cfi_bpf_subprog_hash = cfi_rehash(cfi_bpf_subprog_hash);
}
ret = cfi_rand_preamble(start_cfi, end_cfi); if (ret) goto err;
ret = cfi_rand_callers(start_retpoline, end_retpoline); if (ret) goto err;
}
switch (cfi_mode) { case CFI_OFF: if (builtin)
pr_info("Disabling CFI\n"); return;
case CFI_KCFI:
ret = cfi_enable_callers(start_retpoline, end_retpoline); if (ret) goto err;
if (builtin)
pr_info("Using kCFI\n"); return;
case CFI_FINEIBT: /* place the FineIBT preamble at func()-16 */
ret = cfi_rewrite_preamble(start_cfi, end_cfi); if (ret) goto err;
/* rewrite the callers to target func()-16 */
ret = cfi_rewrite_callers(start_retpoline, end_retpoline); if (ret) goto err;
/* now that nobody targets func()+0, remove ENDBR there */
cfi_rewrite_endbr(start_cfi, end_cfi);
staticvoid poison_cfi(void *addr)
{ /* * Compilers manage to be inconsistent with ENDBR vs __cfi prefixes, * some (static) functions for which they can determine the address * is never taken do not get a __cfi prefix, but *DO* get an ENDBR. * * As such, these functions will get sealed, but we need to be careful * to not unconditionally scribble the previous function.
*/ switch (cfi_mode) { case CFI_FINEIBT: /* * FineIBT prefix should start with an ENDBR.
*/ if (!is_endbr(addr)) break;
/* * When regs->ip points to a 0xEA byte in the FineIBT preamble, * return true and fill out target and type. * * We check the preamble by checking for the ENDBR instruction relative to the * 0xEA instruction.
*/ staticbool decode_fineibt_preamble(struct pt_regs *regs, unsignedlong *target, u32 *type)
{ unsignedlong addr = regs->ip - fineibt_preamble_ud;
u32 hash;
/* * Since regs->ip points to the middle of an instruction; it cannot * continue with the normal fixup.
*/
regs->ip = *target;
returntrue;
Efault: returnfalse;
}
/* * regs->ip points to one of the UD2 in __bhi_args[].
*/ staticbool decode_fineibt_bhi(struct pt_regs *regs, unsignedlong *target, u32 *type)
{ unsignedlong addr;
u32 hash;
if (!cfi_bhi) returnfalse;
if (regs->ip < (unsignedlong)__bhi_args ||
regs->ip >= (unsignedlong)__bhi_args_end) returnfalse;
/* * Fetch the return address from the stack, this points to the * FineIBT preamble. Since the CALL instruction is in the 5 last * bytes of the preamble, the return address is in fact the target * address.
*/
__get_kernel_nofault(&addr, regs->sp, unsignedlong, Efault);
*target = addr;
addr -= fineibt_preamble_size; if (!exact_endbr((void *)addr)) returnfalse;
/* * regs->ip points to a LOCK Jcc.d8 instruction from the fineibt_paranoid_start[] * sequence, or to an invalid instruction (0xea) + Jcc.d8 for cfi_paranoid + ITS * thunk.
*/ staticbool decode_fineibt_paranoid(struct pt_regs *regs, unsignedlong *target, u32 *type)
{ unsignedlong addr = regs->ip - fineibt_paranoid_ud;
/* * Return 1 if the address range is reserved for SMP-alternatives. * Must hold text_mutex.
*/ int alternatives_text_reserved(void *start, void *end)
{ struct smp_alt_module *mod; const s32 *poff;
u8 *text_start = start;
u8 *text_end = end;
/* * Self-test for the INT3 based CALL emulation code. * * This exercises int3_emulate_call() to make sure INT3 pt_regs are set up * properly and that there is a stack gap between the INT3 frame and the * previous context. Without this gap doing a virtual PUSH on the interrupted * stack would corrupt the INT3 IRET frame. * * See entry_{32,64}.S for more details.
*/
/* * We define the int3_magic() function in assembly to control the calling * convention such that we can 'call' it from assembly.
*/
externvoid int3_magic(unsignedint *ptr); /* defined in asm */
static noinline void __init alt_reloc_selftest(void)
{ /* * Tests text_poke_apply_relocation(). * * This has a relative immediate (CALL) in a place other than the first * instruction and additionally on x86_64 we get a RIP-relative LEA: * * lea 0x0(%rip),%rdi # 5d0: R_X86_64_PC32 .init.data+0x5566c * call +0 # 5d5: R_X86_64_PLT32 __alt_reloc_selftest-0x4 * * Getting this wrong will either crash and burn or tickle the WARN * above.
*/
asm_inline volatile (
ALTERNATIVE("", "lea %[mem], %%" _ASM_ARG1 "; call __alt_reloc_selftest;", X86_FEATURE_ALWAYS)
: ASM_CALL_CONSTRAINT
: [mem] "m" (__alt_reloc_selftest_addr)
: _ASM_ARG1
);
}
/* * The patching is not fully atomic, so try to avoid local * interruptions that might execute the to be patched code. * Other CPUs are not running.
*/
stop_nmi();
/* * Don't stop machine check exceptions while patching. * MCEs only happen when something got corrupted and in this * case we must do something about the corruption. * Ignoring it is worse than an unlikely patching race. * Also machine checks tend to be broadcast and if one CPU * goes into machine check the others follow quickly, so we don't * expect a machine check to cause undue problems during to code * patching.
*/
/* * Make sure to set (artificial) features depending on used paravirt * functions which can later influence alternative patching.
*/
paravirt_set_cap();
/* Keep CET-IBT disabled until caller/callee are patched */
ibt = ibt_save(/*disable*/ true);
/* * Rewrite the retpolines, must be done before alternatives since * those can rewrite the retpoline thunks.
*/
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
apply_returns(__return_sites, __return_sites_end);
its_fini_core();
/* * Adjust all CALL instructions to point to func()-10, including * those in .altinstr_replacement.
*/
callthunks_patch_builtin_calls();
/** * text_poke_early - Update instructions on a live kernel at boot time * @addr: address to modify * @opcode: source of the copy * @len: length to copy * * When you use this code to patch more than one byte of an instruction * you need to make sure that other CPUs cannot execute this code in parallel. * Also no thread must be currently preempted in the middle of these * instructions. And on the local CPU you need to be protected against NMI or * MCE handlers seeing an inconsistent instruction while you patch.
*/ void __init_or_module text_poke_early(void *addr, constvoid *opcode,
size_t len)
{ unsignedlong flags;
if (boot_cpu_has(X86_FEATURE_NX) &&
is_module_text_address((unsignedlong)addr)) { /* * Modules text is marked initially as non-executable, so the * code cannot be running and speculative code-fetches are * prevented. Just change the code.
*/
memcpy(addr, opcode, len);
} else {
local_irq_save(flags);
memcpy(addr, opcode, len);
sync_core();
local_irq_restore(flags);
/* * Could also do a CLFLUSH here to speed up CPU recovery; but * that causes hangs on some VIA CPUs.
*/
}
}
/* * While boot memory allocator is running we cannot use struct pages as * they are not yet initialized. There is no way to recover.
*/
BUG_ON(!after_bootmem);
if (!core_kernel_text((unsignedlong)addr)) {
pages[0] = vmalloc_to_page(addr); if (cross_page_boundary)
pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
} else {
pages[0] = virt_to_page(addr);
WARN_ON(!PageReserved(pages[0])); if (cross_page_boundary)
pages[1] = virt_to_page(addr + PAGE_SIZE);
} /* * If something went wrong, crash and burn since recovery paths are not * implemented.
*/
BUG_ON(!pages[0] || (cross_page_boundary && !pages[1]));
/* * Map the page without the global bit, as TLB flushing is done with * flush_tlb_mm_range(), which is intended for non-global PTEs.
*/
pgprot = __pgprot(pgprot_val(PAGE_KERNEL) & ~_PAGE_GLOBAL);
/* * The lock is not really needed, but this allows to avoid open-coding.
*/
ptep = get_locked_pte(text_poke_mm, text_poke_mm_addr, &ptl);
/* * This must not fail; preallocated in poking_init().
*/
VM_BUG_ON(!ptep);
/* * Loading the temporary mm behaves as a compiler barrier, which * guarantees that the PTE will be set at the time memcpy() is done.
*/
prev_mm = use_temporary_mm(text_poke_mm);
/* * Loading the previous page-table hierarchy requires a serializing * instruction that already allows the core to see the updated version. * Xen-PV is assumed to serialize execution in a similar manner.
*/
unuse_temporary_mm(prev_mm);
/* * Flushing the TLB might involve IPIs, which would require enabled * IRQs, but not if the mm is not used, as it is in this point.
*/
flush_tlb_mm_range(text_poke_mm, text_poke_mm_addr, text_poke_mm_addr +
(cross_page_boundary ? 2 : 1) * PAGE_SIZE,
PAGE_SHIFT, false);
if (func == text_poke_memcpy) { /* * If the text does not match what we just wrote then something is * fundamentally screwy; there's nothing we can really do about that.
*/
BUG_ON(memcmp(addr, src, len));
}
/** * text_poke - Update instructions on a live kernel * @addr: address to modify * @opcode: source of the copy * @len: length to copy * * Only atomic text poke/set should be allowed when not doing early patching. * It means the size must be writable atomically and the address must be aligned * in a way that permits an atomic write. It also makes sure we fit on a single * page. * * Note that the caller must ensure that if the modified code is part of a * module, the module would not be removed during poking. This can be achieved * by registering a module notifier, and ordering module removal and patching * through a mutex.
*/ void *text_poke(void *addr, constvoid *opcode, size_t len)
{
lockdep_assert_held(&text_mutex);
/** * text_poke_kgdb - Update instructions on a live kernel by kgdb * @addr: address to modify * @opcode: source of the copy * @len: length to copy * * Only atomic text poke/set should be allowed when not doing early patching. * It means the size must be writable atomically and the address must be aligned * in a way that permits an atomic write. It also makes sure we fit on a single * page. * * Context: should only be used by kgdb, which ensures no other core is running, * despite the fact it does not hold the text_mutex.
*/ void *text_poke_kgdb(void *addr, constvoid *opcode, size_t len)
{ return __text_poke(text_poke_memcpy, addr, opcode, len);
}
/** * text_poke_copy - Copy instructions into (an unused part of) RX memory * @addr: address to modify * @opcode: source of the copy * @len: length to copy, could be more than 2x PAGE_SIZE * * Not safe against concurrent execution; useful for JITs to dump * new code blocks into unused regions of RX memory. Can be used in * conjunction with synchronize_rcu_tasks() to wait for existing * execution to quiesce after having made sure no existing functions * pointers are live.
*/ void *text_poke_copy(void *addr, constvoid *opcode, size_t len)
{
mutex_lock(&text_mutex);
addr = text_poke_copy_locked(addr, opcode, len, false);
mutex_unlock(&text_mutex); return addr;
}
/** * text_poke_set - memset into (an unused part of) RX memory * @addr: address to modify * @c: the byte to fill the area with * @len: length to copy, could be more than 2x PAGE_SIZE * * This is useful to overwrite unused regions of RX memory with illegal * instructions.
*/ void *text_poke_set(void *addr, int c, size_t len)
{ unsignedlong start = (unsignedlong)addr;
size_t patched = 0;
if (WARN_ON_ONCE(core_kernel_text(start))) return NULL;
/* * NOTE: crazy scheme to allow patching Jcc.d32 but not increase the size of * this thing. When len == 6 everything is prefixed with 0x0f and we map * opcode to Jcc.d8, using len to distinguish.
*/ struct smp_text_poke_loc { /* addr := _stext + rel_addr */
s32 rel_addr;
s32 disp;
u8 len;
u8 opcode; const u8 text[TEXT_POKE_MAX_OPCODE_SIZE]; /* see smp_text_poke_batch_finish() */
u8 old;
};
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.