indirect_call:
tok = strchr(endptr, '*'); if (tok != NULL) {
endptr++;
/* Indirect call can use a non-rip register and offset: callq *0x8(%rbx).
* Do not parse such instruction. */ if (strstr(endptr, "(%r") == NULL)
ops->target.addr = strtoull(endptr, NULL, 16);
} goto find_target;
}
staticint call__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops, int max_ins_name)
{ if (ops->target.sym) return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.sym->name);
if (ops->target.addr == 0) return ins__raw_scnprintf(ins, bf, size, ops, max_ins_name);
if (ops->target.name) return scnprintf(bf, size, "%-*s %s", max_ins_name, ins->name, ops->target.name);
/* * Examples of lines to parse for the _cpp_lex_token@@Base * function: * * 1159e6c: jne 115aa32 <_cpp_lex_token@@Base+0xf92> * 1159e8b: jne c469be <cpp_named_operator2name@@Base+0xa72> * * The first is a jump to an offset inside the same function, * the second is to another function, i.e. that 0xa72 is an * offset in the cpp_named_operator2name@@base function.
*/ /* * skip over possible up to 2 operands to get to address, e.g.: * tbnz w0, #26, ffff0000083cd190 <security_file_permission+0xd0>
*/ if (c != NULL) {
c++;
ops->target.addr = strtoull(c, NULL, 16); if (!ops->target.addr) {
c = strchr(c, ',');
c = validate_comma(c, ops); if (c != NULL) {
c++;
ops->target.addr = strtoull(c, NULL, 16);
}
}
} else {
ops->target.addr = strtoull(ops->raw, NULL, 16);
}
/* * FIXME: things like this in _cpp_lex_token (gcc's cc1 program):
cpp_named_operator2name@@Base+0xa72
* Point to a place that is after the cpp_named_operator2name * boundaries, i.e. in the ELF symbol table for cc1 * cpp_named_operator2name is marked as being 32-bytes long, but it in * fact is much larger than that, so we seem to need a symbols__find() * routine that looks for >= current->start and < next_symbol->start, * possibly just for C++ objects? * * For now lets just make some progress by marking jumps to outside the * current function as call like. * * Actual navigation will come next, with further understanding of how * the symbol searching and disassembly should be done.
*/ if (maps__find_ams(ms->maps, &target) == 0 &&
map__rip_2objdump(target.ms.map, map__map_ip(target.ms.map, target.addr)) == ops->target.addr)
ops->target.sym = target.ms.sym;
return scnprintf(bf, size, "%-*s %.*s%" PRIx64, max_ins_name,
ins->name, c ? c - ops->raw : 0, ops->raw,
ops->target.offset);
}
staticvoid jump__delete(struct ins_operands *ops __maybe_unused)
{ /* * The ops->jump.raw_comment and ops->jump.raw_func_start belong to the * raw string, don't free them.
*/
}
/* * Check if the operand has more than one registers like x86 SIB addressing: * 0x1234(%rax, %rbx, 8) * * But it doesn't care segment selectors like %gs:0x5678(%rcx), so just check * the input string after 'memory_ref_char' if exists.
*/ staticbool check_multi_regs(struct arch *arch, constchar *op)
{ int count = 0;
if (arch->objdump.register_char == 0) returnfalse;
if (arch->objdump.memory_ref_char) {
op = strchr(op, arch->objdump.memory_ref_char); if (op == NULL) returnfalse;
}
/* * x86 SIB addressing has something like 0x8(%rax, %rcx, 1) * then it needs to have the closing parenthesis.
*/ if (strchr(ops->raw, '(')) {
*s = ',';
s = strchr(ops->raw, ')'); if (s == NULL || s[1] != ',') return -1;
*++s = '\0';
}
/* * Sets the fields: multi_regs and "mem_ref". * "mem_ref" is set for ops->source which is later used to * fill the objdump->memory_ref-char field. This ops is currently * used by powerpc and since binary instruction code is used to * extract opcode, regs and offset, no other parsing is needed here. * * Dont set multi regs for 4 cases since it has only one operand * for source: * - Add to Minus One Extended XO-form ( Ex: addme, addmeo ) * - Subtract From Minus One Extended XO-form ( Ex: subfme ) * - Add to Zero Extended XO-form ( Ex: addze, addzeo ) * - Subtract From Zero Extended XO-form ( Ex: subfze )
*/ staticint arithmetic__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused, struct disasm_line *dl)
{ int opcode = PPC_OP(dl->raw.raw_insn);
/* * Sets the fields: multi_regs and "mem_ref". * "mem_ref" is set for ops->source which is later used to * fill the objdump->memory_ref-char field. This ops is currently * used by powerpc and since binary instruction code is used to * extract opcode, regs and offset, no other parsing is needed here
*/ staticint load_store__parse(struct arch *arch __maybe_unused, struct ins_operands *ops, struct map_symbol *ms __maybe_unused, struct disasm_line *dl __maybe_unused)
{
ops->source.mem_ref = true;
ops->source.multi_regs = false; /* opcode 31 is of X form */ if (PPC_OP(dl->raw.raw_insn) == 31)
ops->source.multi_regs = true;
/* * Parses the result captured from symbol__disassemble_* * Example, line read from DSO file in powerpc: * line: 38 01 81 e8 * opcode: fetched from arch specific get_opcode_insn * rawp_insn: e8810138 * * rawp_insn is used later to extract the reg/offset fields
*/ #define PPC_OP(op) (((op) >> 26) & 0x3F) #define RAW_BYTES 11
/* * Allocating the disasm annotation line data with * following structure: * * ------------------------------------------- * struct disasm_line | struct annotation_line * ------------------------------------------- * * We have 'struct annotation_line' member as last member * of 'struct disasm_line' to have an easy access.
*/ struct disasm_line *disasm_line__new(struct annotate_args *args)
{ struct disasm_line *dl = NULL; struct annotation *notes = symbol__annotation(args->ms.sym); int nr = notes->src->nr_events;
dl = zalloc(disasm_line_size(nr)); if (!dl) return NULL;
annotation_line__init(&dl->al, args, nr); if (dl->al.line == NULL) goto out_delete;
if (args->offset != -1) { if (arch__is(args->arch, "powerpc")) { if (disasm_line__parse_powerpc(dl, args) < 0) goto out_free_line;
} elseif (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line;
/* * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) * which looks like following * * 0000000000415500 <_init>: * 415500: sub $0x8,%rsp * 415504: mov 0x2f5ad5(%rip),%rax # 70afe0 <_DYNAMIC+0x2f8> * 41550b: test %rax,%rax * 41550e: je 415515 <_init+0x15> * 415510: callq 416e70 <__gmon_start__@plt> * 415515: add $0x8,%rsp * 415519: retq * * it will be parsed and saved into struct disasm_line as * <offset> <name> <ops.raw> * * The offset will be a relative offset from the start of the symbol and -1 * means that it's not a disassembly line so should be treated differently. * The ops.raw part will be parsed further according to type of the instruction.
*/ staticint symbol__parse_objdump_line(struct symbol *sym, struct annotate_args *args, char *parsed_line, int *line_nr, char **fileloc)
{ struct map *map = args->ms.map; struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; char *tmp;
s64 line_ip, offset = -1;
regmatch_t match[2];
/* /filename:linenr ? Save line number and ignore. */ if (regexec(&file_lineno, parsed_line, 2, match, 0) == 0) {
*line_nr = atoi(parsed_line + match[1].rm_so);
free(*fileloc);
*fileloc = strdup(parsed_line); return 0;
}
/* Process hex address followed by ':'. */
line_ip = strtoull(parsed_line, &tmp, 16); if (parsed_line != tmp && tmp[0] == ':' && tmp[1] != '\0') {
u64 start = map__rip_2objdump(map, sym->start),
end = map__rip_2objdump(map, sym->end);
switch (errnum) { case SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX: { char bf[SBUILD_ID_SIZE + 15] = " with build id "; char *build_id_msg = NULL;
if (dso__has_build_id(dso)) {
build_id__snprintf(dso__bid(dso), bf + 15, sizeof(bf) - 15);
build_id_msg = bf;
}
scnprintf(buf, buflen, "No vmlinux file%s\nwas found in the path.\n\n" "Note that annotation using /proc/kcore requires CAP_SYS_RAWIO capability.\n\n" "Please use:\n\n" " perf buildid-cache -vu vmlinux\n\n" "or:\n\n" " --vmlinux vmlinux\n", build_id_msg ?: "");
} break; case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF:
scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); break; case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_REGEXP:
scnprintf(buf, buflen, "Problems with arch specific instruction name regular expressions."); break; case SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING:
scnprintf(buf, buflen, "Problems while parsing the CPUID in the arch specific initialization."); break; case SYMBOL_ANNOTATE_ERRNO__BPF_INVALID_FILE:
scnprintf(buf, buflen, "Invalid BPF file: %s.", dso__long_name(dso)); break; case SYMBOL_ANNOTATE_ERRNO__BPF_MISSING_BTF:
scnprintf(buf, buflen, "The %s BPF file has no BTF section, compile with -g or use pahole -J.",
dso__long_name(dso)); break; case SYMBOL_ANNOTATE_ERRNO__COULDNT_DETERMINE_FILE_TYPE:
scnprintf(buf, buflen, "Couldn't determine the file %s type.", dso__long_name(dso)); break; default:
scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); break;
}
build_id_path = strdup(filename); if (!build_id_path) return ENOMEM;
/* * old style build-id cache has name of XX/XXXXXXX.. while * new style has XX/XXXXXXX../{elf,kallsyms,vdso}. * extract the build-id part of dirname in the new style only.
*/
pos = strrchr(build_id_path, '/'); if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
dirname(build_id_path);
if (dso__is_kcore(dso)) goto fallback;
len = readlink(build_id_path, linkname, sizeof(linkname) - 1); if (len < 0) goto fallback;
linkname[len] = '\0'; if (strstr(linkname, DSO__NAME_KALLSYMS) ||
access(filename, R_OK)) {
fallback: /* * If we don't have build-ids or the build-id file isn't in the * cache, or is just a kallsyms file, well, lets hope that this * DSO is the same as when 'perf record' ran.
*/ if (dso__kernel(dso) && dso__long_name(dso)[0] == '/')
snprintf(filename, filename_size, "%s", dso__long_name(dso)); else
__symbol__join_symfs(filename, filename_size, dso__long_name(dso));
/* TODO: support more architectures */ if (!arch__is(args->arch, "x86")) return -1;
if (cs_open(CS_ARCH_X86, mode, handle) != CS_ERR_OK) return -1;
if (!opt->disassembler_style ||
!strcmp(opt->disassembler_style, "att"))
cs_option(*handle, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
/* * Resolving address operands to symbols is implemented * on x86 by investigating instruction details.
*/
cs_option(*handle, CS_OPT_DETAIL, CS_OPT_ON);
/* This will be called for each PHDR in an ELF binary */ staticint find_file_offset(u64 start, u64 len, u64 pgoff, void *arg)
{ struct find_file_offset_data *data = arg;
/* TODO: support more architectures */ if (!arch__is(args->arch, "x86")) return;
if (insn->detail == NULL) return;
for (i = 0; i < insn->detail->x86.op_count; i++) {
cs_x86_op *op = &insn->detail->x86.operands[i];
u64 orig_addr;
if (op->type != X86_OP_MEM) continue;
/* only print RIP-based global symbols for now */ if (op->mem.base != X86_REG_RIP) continue;
/* get the target address */
orig_addr = addr + insn->size + op->mem.disp;
addr = map__objdump_2mem(map, orig_addr);
if (dso__kernel(map__dso(map))) { /* * The kernel maps can be splitted into sections, * let's find the map first and the search the symbol.
*/
map = maps__find(map__kmaps(map), addr); if (map == NULL) continue;
}
/* convert it to map-relative address for search */
addr = map__map_ip(map, addr);
sym = map__find_symbol(map, addr); if (sym == NULL) continue;
/* * TODO: enable disassm for powerpc * count = cs_disasm(handle, buf, len, start, len, &insn); * * For now, only binary code is saved in disassembled line * to be used in "type" and "typeoff" sort keys. Each raw code * is 32 bit instruction. So use "len/4" to get the number of * entries.
*/
count = len/4;
for (i = 0, offset = 0; i < count; i++) {
args->offset = offset;
sprintf(args->line, "%x", line[i]);
dl = disasm_line__new(args); if (dl == NULL) break;
/* * Whenever LLVM wants to resolve an address into a symbol, it calls this * callback. We don't ever actually _return_ anything (in particular, because * it puts quotation marks around what we return), but we use this as a hint * that there is a branch or PC-relative address in the expression that we * should add some textual annotation for after the instruction. The caller * will use this information to add the actual annotation.
*/ staticconstchar *
symbol_lookup_callback(void *disinfo, uint64_t value,
uint64_t *ref_type,
uint64_t address __maybe_unused, constchar **ref __maybe_unused)
{ struct symbol_lookup_storage *storage = disinfo;
if (args->options->disassembler_style &&
!strcmp(args->options->disassembler_style, "intel"))
LLVMSetDisasmOptions(disasm,
LLVMDisassembler_Option_AsmPrinterVariant);
/* * This needs to be set after AsmPrinterVariant, due to a bug in LLVM; * setting AsmPrinterVariant makes a new instruction printer, making it * forget about the PrintImmHex flag (which is applied before if both * are given to the same call).
*/
LLVMSetDisasmOptions(disasm, LLVMDisassembler_Option_PrintImmHex);
/* add the function address and name */
scnprintf(disasm_buf, sizeof(disasm_buf), "%#"PRIx64" <%s>:",
start, sym->name);
/*
* Possibly create a new version of line with tabs expanded. Returns the
* existing or new line, storage is updated if a new line is allocated. If
* allocation fails then NULL is returned.
*/
static char *expand_tabs(char *line, char **storage, size_t *storage_len)
{
size_t i, src, dst, len, new_storage_len, num_tabs;
char *new_line;
size_t line_len = strlen(line);
for (num_tabs = 0, i = 0; i < line_len; i++)
if (line[i] == '\t')
num_tabs++;
if (num_tabs == 0)
return line;
/*
* Space for the line and '\0', less the leading and trailing
* spaces. Each tab may introduce 7 additional spaces.
*/
new_storage_len = line_len + 1 + (num_tabs * 7);
new_line = malloc(new_storage_len);
if (new_line == NULL) {
pr_err("Failure allocating memory for tab expansion\n");
return NULL;
}
/*
* Copy regions starting at src and expand tabs. If there are two
* adjacent tabs then 'src == i', the memcpy is of size 0 and the spaces
* are inserted.
*/
for (i = 0, src = 0, dst = 0; i < line_len && num_tabs; i++) {
if (line[i] == '\t') {
len = i - src;
memcpy(&new_line[dst], &line[src], len);
dst += len;
new_line[dst++] = ' ';
while (dst % 8 != 0)
new_line[dst++] = ' ';
src = i + 1;
num_tabs--;
}
}
/* Expand the last region. */
len = line_len - src;
memcpy(&new_line[dst], &line[src], len);
dst += len;
new_line[dst] = '\0';
/* Create a pipe to read from for stdout */
memset(&objdump_process, 0, sizeof(objdump_process));
objdump_process.argv = objdump_argv;
objdump_process.out = -1;
objdump_process.err = -1;
objdump_process.no_stderr = 1;
if (start_command(&objdump_process)) {
pr_err("Failure starting to run %s\n", command);
err = -1;
goto out_free_command;
}
file = fdopen(objdump_process.out, "r");
if (!file) {
pr_err("Failure creating FILE stream for %s\n", command);
/*
* If we were using debug info should retry with
* original binary.
*/
err = -1;
goto out_close_stdout;
}
/* Storage for getline. */
line = NULL;
line_len = 0;
/*
* The sourcecode line number (lineno) needs to be kept in
* across calls to symbol__parse_objdump_line(), so that it
* can associate it with the instructions till the next one.
* See disasm_line__new() and struct disasm_line::line_nr.
*/
if (symbol__parse_objdump_line(sym, args, expanded_line,
&lineno, &fileloc) < 0)
break;
nline++;
}
free(line);
free(fileloc);
err = finish_command(&objdump_process);
if (err)
pr_err("Error running %s\n", command);
if (nline == 0) {
err = -1;
pr_err("No output from %s\n", command);
}
/*
* kallsyms does not have symbol sizes so there may a nop at the end.
* Remove it.
*/
if (dso__is_kcore(dso))
delete_last_nop(sym);
if (!kcore_extract__create(&kce)) {
delete_extract = true;
strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename));
}
} else if (dso__needs_decompress(dso)) {
char tmp[KMOD_DECOMP_LEN];
if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0)
return -1;
decomp = true;
strcpy(symfs_filename, tmp);
}
/*
* For powerpc data type profiling, use the dso__data_read_offset to
* read raw instruction directly and interpret the binary code to
* understand instructions and register fields. For sort keys as type
* and typeoff, disassemble to mnemonic notation is not required in
* case of powerpc.
*/
if (arch__is(args->arch, "powerpc")) {
extern const char *sort_order;
if (sort_order && !strstr(sort_order, "sym")) {
err = symbol__disassemble_raw(symfs_filename, sym, args);
if (err == 0)
goto out_remove_tmp;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.