if (perf_session__read_header(session) < 0) {
pr_err("incompatible file format (rerun with -v to learn more)\n"); return -1;
}
if (perf_header__has_feat(&session->header, HEADER_AUXTRACE)) { /* Auxiliary events may reference exited threads, hold onto dead ones. */
symbol_conf.keep_exited_threads = true;
}
if (perf_data__is_pipe(data)) return 0;
if (perf_header__has_feat(&session->header, HEADER_STAT)) return 0;
if (!evlist__valid_sample_type(session->evlist)) {
pr_err("non matching sample_type\n"); return -1;
}
if (!evlist__valid_sample_id_all(session->evlist)) {
pr_err("non matching sample_id_all\n"); return -1;
}
if (!evlist__valid_read_format(session->evlist)) {
pr_err("non matching read_format\n"); return -1;
}
perf_env__init(&session->header.env); if (data) {
ret = perf_data__open(data); if (ret < 0) goto out_delete;
session->data = data;
if (perf_data__is_read(data)) {
ret = perf_session__open(session); if (ret < 0) goto out_delete;
/* * set session attributes that are present in perf.data * but not in pipe-mode.
*/ if (!data->is_pipe) {
perf_session__set_id_hdr_size(session);
perf_session__set_comm_exec(session);
}
if (!data || perf_data__is_write(data)) { /* * In O_RDONLY mode this will be performed when reading the * kernel MMAP event, in perf_event__process_mmap().
*/ if (perf_session__create_kernel_maps(session) < 0)
pr_warning("Cannot read kernel map\n");
}
/* * In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is * processed, so evlist__sample_id_all is not meaningful here.
*/ if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
tool->ordered_events && !evlist__sample_id_all(session->evlist)) {
dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
tool->ordered_events = false;
}
/* * XXX this is hack in attempt to carry flags bitfield * through endian village. ABI says: * * Bit-fields are allocated from right to left (least to most significant) * on little-endian implementations and from left to right (most to least * significant) on big-endian implementations. * * The above seems to be byte specific, so we need to reverse each * byte of the bitfield. 'Internet' also says this might be implementation * specific and we probably need proper fix and carry perf_event_attr * bitfield flags in separate data file FEAT_ section. Thought this seems * to work for now.
*/ staticvoid swap_bitfield(u8 *p, unsigned len)
{ unsigned i;
for (i = 0; i < len; i++) {
*p = revbyte(*p);
p++;
}
}
/* exported for swapping attributes in file header */ void perf_event__attr_swap(struct perf_event_attr *attr)
{
attr->type = bswap_32(attr->type);
attr->size = bswap_32(attr->size);
/* * After read_format are bitfields. Check read_format because * we are unable to use offsetof on bitfield.
*/ if (bswap_safe(read_format, 1))
swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); #undef bswap_field_64 #undef bswap_field_32 #undef bswap_field #undef bswap_safe
}
switch (data->type) { case PERF_CPU_MAP__CPUS:
data->cpus_data.nr = bswap_16(data->cpus_data.nr);
for (unsigned i = 0; i < data->cpus_data.nr; i++)
data->cpus_data.cpu[i] = bswap_16(data->cpus_data.cpu[i]); break; case PERF_CPU_MAP__MASK:
data->mask32_data.long_size = bswap_16(data->mask32_data.long_size);
switch (data->mask32_data.long_size) { case 4:
data->mask32_data.nr = bswap_16(data->mask32_data.nr); for (unsigned i = 0; i < data->mask32_data.nr; i++)
data->mask32_data.mask[i] = bswap_32(data->mask32_data.mask[i]); break; case 8:
data->mask64_data.nr = bswap_16(data->mask64_data.nr); for (unsigned i = 0; i < data->mask64_data.nr; i++)
data->mask64_data.mask[i] = bswap_64(data->mask64_data.mask[i]); break; default:
pr_err("cpu_map swap: unsupported long size\n");
} break; case PERF_CPU_MAP__RANGE_CPUS:
data->range_cpu_data.start_cpu = bswap_16(data->range_cpu_data.start_cpu);
data->range_cpu_data.end_cpu = bswap_16(data->range_cpu_data.end_cpu); break; default: break;
}
}
/* * When perf record finishes a pass on every buffers, it records this pseudo * event. * We record the max timestamp t found in the pass n. * Assuming these timestamps are monotonic across cpus, we know that if * a buffer still has events with timestamps below t, they will be all * available and then read in the pass n + 1. * Hence when we start to read the pass n + 2, we can safely flush every * events with timestamps below t. * * ============ PASS n ================= * CPU 0 | CPU 1 * | * cnt1 timestamps | cnt2 timestamps * 1 | 2 * 2 | 3 * - | 4 <--- max recorded * * ============ PASS n + 1 ============== * CPU 0 | CPU 1 * | * cnt1 timestamps | cnt2 timestamps * 3 | 5 * 4 | 6 * 5 | 7 <---- max recorded * * Flush every events below timestamp 4 * * ============ PASS n + 2 ============== * CPU 0 | CPU 1 * | * cnt1 timestamps | cnt2 timestamps * 6 | 8 * 7 | 9 * - | 10 * * Flush every events below timestamp 7 * etc...
*/ int perf_event__process_finished_round(conststruct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct ordered_events *oe)
{ if (dump_trace)
fprintf(stdout, "\n"); return ordered_events__flush(oe, OE_FLUSH__ROUND);
}
for (i = 0; i < kernel_callchain_nr; i++) { if (callchain->ips[i] == PERF_CONTEXT_USER) break;
}
if ((i != kernel_callchain_nr) && lbr_stack->nr) {
u64 total_nr; /* * LBR callstack can only get user call chain, * i is kernel call chain number, * 1 is PERF_CONTEXT_USER. * * The user call chain is stored in LBR registers. * LBR are pair registers. The caller is stored * in "from" register, while the callee is stored * in "to" register. * For example, there is a call stack * "A"->"B"->"C"->"D". * The LBR registers will be recorded like * "C"->"D", "B"->"C", "A"->"B". * So only the first "to" register and all "from" * registers are needed to construct the whole stack.
*/
total_nr = i + 1 + lbr_stack->nr + 1;
kernel_callchain_nr = i + 1;
if (!callstack) {
printf("%s: nr:%" PRIu64 "\n", "... branch stack", sample->branch_stack->nr);
} else { /* the reason of adding 1 to nr is because after expanding * branch stack it generates nr + 1 callstack records. e.g., * B()->C() * A()->B() * the final callstack should be: * C() * B() * A()
*/
printf("%s: nr:%" PRIu64 "\n", "... branch callstack", sample->branch_stack->nr+1);
}
for (i = 0; i < sample->branch_stack->nr; i++) { struct branch_entry *e = &entries[i];
/* * Guest code machine is created as needed and does not use * DEFAULT_GUEST_KERNEL_ID.
*/ if (symbol_conf.guest_code) return machines__findnew(machines, pid);
/* Standard sample delivery. */ if (!(sample_type & PERF_SAMPLE_READ)) return tool->sample(tool, event, sample, evsel, machine);
/* For PERF_SAMPLE_READ we have either single or group mode. */ if (read_format & PERF_FORMAT_GROUP) return deliver_sample_group(evlist, tool, event, sample,
machine, read_format, per_thread); else return deliver_sample_value(evlist, tool, event, sample,
&sample->read.one, machine,
per_thread);
}
/* These events are processed right away */ switch (event->header.type) { case PERF_RECORD_HEADER_ATTR:
err = tool->attr(tool, event, &session->evlist); if (err == 0) {
perf_session__set_id_hdr_size(session);
perf_session__set_comm_exec(session);
} break; case PERF_RECORD_EVENT_UPDATE:
err = tool->event_update(tool, event, &session->evlist); break; case PERF_RECORD_HEADER_EVENT_TYPE: /* * Deprecated, but we need to handle it for sake * of old data files create in pipe mode.
*/
err = 0; break; case PERF_RECORD_HEADER_TRACING_DATA: /* * Setup for reading amidst mmap, but only when we * are in 'file' mode. The 'pipe' fd is in proper * place already.
*/ if (!perf_data__is_pipe(session->data))
lseek(fd, file_offset, SEEK_SET);
err = tool->tracing_data(session, event); break; case PERF_RECORD_HEADER_BUILD_ID:
err = tool->build_id(session, event); break; case PERF_RECORD_FINISHED_ROUND:
err = tool->finished_round(tool, event, oe); break; case PERF_RECORD_ID_INDEX:
err = tool->id_index(session, event); break; case PERF_RECORD_AUXTRACE_INFO:
err = tool->auxtrace_info(session, event); break; case PERF_RECORD_AUXTRACE: /* * Setup for reading amidst mmap, but only when we * are in 'file' mode. The 'pipe' fd is in proper * place already.
*/ if (!perf_data__is_pipe(session->data))
lseek(fd, file_offset + event->header.size, SEEK_SET);
err = tool->auxtrace(session, event); break; case PERF_RECORD_AUXTRACE_ERROR:
perf_session__auxtrace_error_inc(session, event);
err = tool->auxtrace_error(session, event); break; case PERF_RECORD_THREAD_MAP:
err = tool->thread_map(session, event); break; case PERF_RECORD_CPU_MAP:
err = tool->cpu_map(session, event); break; case PERF_RECORD_STAT_CONFIG:
err = tool->stat_config(session, event); break; case PERF_RECORD_STAT:
err = tool->stat(session, event); break; case PERF_RECORD_STAT_ROUND:
err = tool->stat_round(session, event); break; case PERF_RECORD_TIME_CONV:
session->time_conv = event->time_conv;
err = tool->time_conv(session, event); break; case PERF_RECORD_HEADER_FEATURE:
err = tool->feature(session, event); break; case PERF_RECORD_COMPRESSED: case PERF_RECORD_COMPRESSED2:
err = tool->compressed(session, event, file_offset, file_path); if (err)
dump_event(session->evlist, event, file_offset, &sample, file_path); break; case PERF_RECORD_FINISHED_INIT:
err = tool->finished_init(session, event); break; case PERF_RECORD_BPF_METADATA:
err = tool->bpf_metadata(session, event); break; default:
err = -EINVAL; break;
}
perf_sample__exit(&sample); return err;
}
if (session->header.needs_swap)
event_swap(event, evlist__sample_id_all(evlist));
if (event->header.type >= PERF_RECORD_HEADER_MAX) { /* perf should not support unaligned event, stop here. */ if (event->header.size % sizeof(u64)) return -EINVAL;
/* This perf is outdated and does not support the latest event type. */
ui__warning("Unsupported header type %u, please consider updating perf.\n",
event->header.type); /* Skip unsupported event by returning its size. */ return event->header.size;
}
if (session->tool->lost == perf_event__process_lost &&
stats->nr_events[PERF_RECORD_LOST] != 0) {
ui__warning("Processed %d events and lost %d chunks!\n\n" "Check IO/CPU overload!\n\n",
stats->nr_events[0],
stats->nr_events[PERF_RECORD_LOST]);
}
if (session->tool->lost_samples == perf_event__process_lost_samples) { double drop_rate;
drop_rate = (double)stats->total_lost_samples /
(double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples); if (drop_rate > 0.05) {
ui__warning("Processed %" PRIu64 " samples and lost %3.2f%%!\n\n",
stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
drop_rate * 100.0);
}
}
if (session->tool->aux == perf_event__process_aux &&
stats->total_aux_lost != 0) {
ui__warning("AUX data lost %" PRIu64 " times out of %u!\n\n",
stats->total_aux_lost,
stats->nr_events[PERF_RECORD_AUX]);
}
ui__warning("AUX data had gaps in it %" PRIu64 " times out of %u!\n\n" "Are you running a KVM guest in the background?%s\n\n",
stats->total_aux_partial,
stats->nr_events[PERF_RECORD_AUX],
vmm_exclusive ? "\nReloading kvm_intel module with vmm_exclusive=0\n" "will reduce the gaps to only guest's timeslices." : "");
}
if (session->tool->aux == perf_event__process_aux &&
stats->total_aux_collision != 0) {
ui__warning("AUX data detected collision %" PRIu64 " times out of %u!\n\n",
stats->total_aux_collision,
stats->nr_events[PERF_RECORD_AUX]);
}
if (stats->nr_unknown_events != 0) {
ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " "file generated by a more recent tool?\n\n" "If that is not the case, consider " "reporting to linux-kernel@vger.kernel.org.\n\n",
stats->nr_unknown_events);
}
if (stats->nr_unknown_id != 0) {
ui__warning("%u samples with id not present in the header\n",
stats->nr_unknown_id);
}
if (stats->nr_invalid_chains != 0) {
ui__warning("Found invalid callchains!\n\n" "%u out of %u events were discarded for this reason.\n\n" "Consider reporting to linux-kernel@vger.kernel.org.\n\n",
stats->nr_invalid_chains,
stats->nr_events[PERF_RECORD_SAMPLE]);
}
if (stats->nr_unprocessable_samples != 0) {
ui__warning("%u unprocessable samples recorded.\n" "Do you have a KVM guest running and not using 'perf kvm'?\n",
stats->nr_unprocessable_samples);
}
perf_session__warn_order(session);
events_stats__auxtrace_error_warn(stats);
if (stats->nr_proc_map_timeout != 0) {
ui__warning("%d map information files for pre-existing threads were\n" "not processed, if there are samples for addresses they\n" "will not be resolved, you may find out which are these\n" "threads by running with -v and redirecting the output\n" "to a file.\n" "The time limit to process proc map is too short?\n" "Increase it by --proc-map-timeout\n",
stats->nr_proc_map_timeout);
}
}
/* * If it's from a file saving pipe data (by redirection), it would have * a file name other than "-". Then we can get the total size and show * the progress.
*/ if (strcmp(session->data->path, "-") && session->data->file.size) {
ui_progress__init_size(&prog, session->data->file.size, "Processing events...");
update_prog = true;
}
head = 0;
cur_size = sizeof(union perf_event);
buf = malloc(cur_size); if (!buf) return -errno;
ordered_events__set_copy_on_queue(oe, true);
more:
event = buf;
err = perf_data__read(session->data, event, sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) goto done;
pr_err("failed to read event header\n"); goto out_err;
}
if (session->header.needs_swap)
perf_event_header__bswap(&event->header);
if (size > cur_size) { void *new = realloc(buf, size); if (!new) {
pr_err("failed to allocate memory to read event\n"); goto out_err;
}
buf = new;
cur_size = size;
event = buf;
}
p = event;
p += sizeof(struct perf_event_header);
if (size - sizeof(struct perf_event_header)) {
err = perf_data__read(session->data, p,
size - sizeof(struct perf_event_header)); if (err <= 0) { if (err == 0) {
pr_err("unexpected end of event stream\n"); goto done;
}
pr_err("failed to read event data\n"); goto out_err;
}
}
if ((skip = perf_session__process_event(session, event, head, "pipe")) < 0) {
pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n",
head, event->header.size, event->header.type);
err = -EINVAL; goto out_err;
}
head += size;
if (skip > 0)
head += skip;
err = __perf_session__process_decomp_events(session); if (err) goto out_err;
if (update_prog)
ui_progress__update(&prog, size);
if (!session_done()) goto more;
done: /* do the final flush for ordered samples */
err = ordered_events__flush(oe, OE_FLUSH__FINAL); if (err) goto out_err;
err = auxtrace__flush_events(session, tool); if (err) goto out_err;
err = perf_session__flush_thread_stacks(session);
out_err:
free(buf); if (update_prog)
ui_progress__finish(); if (!tool->no_warn)
perf_session__warn_about_errors(session);
ordered_events__free(&session->ordered_events);
auxtrace__free_events(session); return err;
}
staticunion perf_event *
prefetch_event(char *buf, u64 head, size_t mmap_size, bool needs_swap, union perf_event *error)
{ union perf_event *event;
u16 event_size;
/* * Ensure we have enough space remaining to read * the size of the event in the headers.
*/ if (head + sizeof(event->header) > mmap_size) return NULL;
event = (union perf_event *)(buf + head); if (needs_swap)
perf_event_header__bswap(&event->header);
/* We're not fetching the event so swap back again */ if (needs_swap)
perf_event_header__bswap(&event->header);
/* Check if the event fits into the next mmapped buf. */ if (event_size <= mmap_size - head % page_size) { /* Remap buf and fetch again. */ return NULL;
}
/* Invalid input. Event size should never exceed mmap_size. */
pr_debug("%s: head=%#" PRIx64 " event->header.size=%#x, mmap_size=%#zx:" " fuzzed or compressed perf.data?\n", __func__, head, event_size, mmap_size);
/* * On 64bit we can mmap the data file in one go. No need for tiny mmap * slices. On 32bit we use 32MB.
*/ #if BITS_PER_LONG == 64 #define MMAP_SIZE ULLONG_MAX #define NUM_MMAPS 1 #else #define MMAP_SIZE (32 * 1024 * 1024ULL) #define NUM_MMAPS 128 #endif
err = reader__process_events(&rd, session, &prog); if (err) goto out_err; /* do the final flush for ordered samples */
err = ordered_events__flush(oe, OE_FLUSH__FINAL); if (err) goto out_err;
err = auxtrace__flush_events(session, tool); if (err) goto out_err;
err = perf_session__flush_thread_stacks(session);
out_err:
ui_progress__finish(); if (!tool->no_warn)
perf_session__warn_about_errors(session); /* * We may switching perf.data output, make ordered_events * reusable.
*/
ordered_events__reinit(&session->ordered_events);
auxtrace__free_events(session);
reader__release_decomp(&rd);
session->one_mmap = false; return err;
}
/* * Processing 2 MB of data from each reader in sequence, * because that's the way the ordered events sorting works * most efficiently.
*/ #define READER_MAX_SIZE (2 * 1024 * 1024)
/* * This function reads, merge and process directory data.
--> --------------------
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.