/* * Timeless has no timestamps in the trace so overlapping mmap lookups * are less accurate but produces smaller trace data. We use context IDs * in the trace instead of matching timestamps with fork records so * they're not really needed in the general case. Overlapping mmaps * happen in cases like between a fork and an exec.
*/ bool timeless_decoding;
/* * Per-thread ignores the trace channel ID and instead assumes that * everything in a buffer comes from the same process regardless of * which CPU it ran on. It also implies no context IDs so the TID is * taken from the auxtrace buffer.
*/ bool per_thread_decoding; bool snapshot_mode; bool data_queued; bool has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
struct cs_etm_queue { struct cs_etm_auxtrace *etm; struct cs_etm_decoder *decoder; struct auxtrace_buffer *buffer; unsignedint queue_nr;
u8 pending_timestamp_chan_id; enum cs_etm_format format;
u64 offset; constunsignedchar *buf;
size_t buf_len, buf_used; /* Conversion between traceID and index in traceid_queues array */ struct intlist *traceid_queues_list; struct cs_etm_traceid_queue **traceid_queues; /* Conversion between traceID and metadata pointers */ struct intlist *traceid_list; /* * Same as traceid_list, but traceid_list may be a reference to another * queue's which has a matching sink ID.
*/ struct intlist *own_traceid_list;
u32 sink_id;
};
/* PTMs ETMIDR [11:8] set to b0011 */ #define ETMIDR_PTM_VERSION 0x00000300
/* * A struct auxtrace_heap_item only has a queue_nr and a timestamp to * work with. One option is to modify to auxtrace_heap_XYZ() API or simply * encode the etm queue number as the upper 16 bit and the channel as * the lower 16 bit.
*/ #define TO_CS_QUEUE_NR(queue_nr, trace_chan_id) \
(queue_nr << 16 | trace_chan_id) #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16) #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff) #define SINK_UNSET ((u32) -1)
/* * The returned PID format is presented as an enum: * * CS_ETM_PIDFMT_CTXTID: CONTEXTIDR or CONTEXTIDR_EL1 is traced. * CS_ETM_PIDFMT_CTXTID2: CONTEXTIDR_EL2 is traced. * CS_ETM_PIDFMT_NONE: No context IDs * * It's possible that the two bits ETM_OPT_CTXTID and ETM_OPT_CTXTID2 * are enabled at the same time when the session runs on an EL2 kernel. * This means the CONTEXTIDR_EL1 and CONTEXTIDR_EL2 both will be * recorded in the trace data, the tool will selectively use * CONTEXTIDR_EL2 as PID. * * The result is cached in etm->pid_fmt so this function only needs to be called * when processing the aux info.
*/ staticenum cs_etm_pid_fmt cs_etm__init_pid_fmt(u64 *metadata)
{
u64 val;
if (metadata[CS_ETM_MAGIC] == __perf_cs_etmv3_magic) {
val = metadata[CS_ETM_ETMCR]; /* CONTEXTIDR is traced */ if (val & BIT(ETM_OPT_CTXTID)) return CS_ETM_PIDFMT_CTXTID;
} else {
val = metadata[CS_ETMV4_TRCCONFIGR]; /* CONTEXTIDR_EL2 is traced */ if (val & (BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT))) return CS_ETM_PIDFMT_CTXTID2; /* CONTEXTIDR_EL1 is traced */ elseif (val & BIT(ETM4_CFG_BIT_CTXTID)) return CS_ETM_PIDFMT_CTXTID;
}
staticint cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
u8 trace_chan_id, u64 *cpu_metadata)
{ /* Get an RB node for this CPU */ struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
/* Something went wrong, no need to continue */ if (!inode) return -ENOMEM;
/* Disallow re-mapping a different traceID to metadata pair. */ if (inode->priv) {
u64 *curr_cpu_data = inode->priv;
u8 curr_chan_id; int err;
if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) { /* * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs * are expected (but not supported) in per-thread mode, * rather than signifying an error.
*/ if (etmq->etm->per_thread_decoding)
pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n"); else
pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
return -EINVAL;
}
/* check that the mapped ID matches */
err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data); if (err) return err;
if (curr_chan_id != trace_chan_id) {
pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n"); return -EINVAL;
}
/* Skip re-adding the same mappings if everything matched */ return 0;
}
/* Not one we've seen before, associate the traceID with the metadata pointer */
inode->priv = cpu_metadata;
return 0;
}
staticstruct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
{ if (etm->per_thread_decoding) return etm->queues.queue_array[0].priv; else return etm->queues.queue_array[cpu].priv;
}
/* * If the queue is unformatted then only save one mapping in the * queue associated with that CPU so only one decoder is made.
*/
etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]); if (etmq->format == UNFORMATTED) return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
cpu_metadata);
/* * Otherwise, version 0 trace IDs are global so save them into every * queue.
*/ for (unsignedint i = 0; i < etm->queues.nr_queues; ++i) { int ret;
etmq = etm->queues.queue_array[i].priv;
ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
cpu_metadata); if (ret) return ret;
}
return 0;
}
staticint cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
u64 hw_id)
{ int err;
u64 *cpu_data;
u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
cpu_data = get_cpu_data(etm, cpu); if (cpu_data == NULL) return -EINVAL;
err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data); if (err) return err;
/* * if we are picking up the association from the packet, need to plug * the correct trace ID into the metadata for setting up decoders later.
*/ return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
}
/* * Check sink id hasn't changed in per-cpu mode. In per-thread mode, * let it pass for now until an actual overlapping trace ID is hit. In * most cases IDs won't overlap even if the sink changes.
*/ if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
etmq->sink_id != sink_id) {
pr_err("CS_ETM: mismatch between sink IDs\n"); return -EINVAL;
}
etmq->sink_id = sink_id;
/* Find which other queues use this sink and link their ID maps */ for (unsignedint i = 0; i < etm->queues.nr_queues; ++i) { struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
/* Different sinks, skip */ if (other_etmq->sink_id != etmq->sink_id) continue;
/* Already linked, skip */ if (other_etmq->traceid_list == etmq->traceid_list) continue;
/* At the point of first linking, this one should be empty */ if (!intlist__empty(etmq->traceid_list)) {
pr_err("CS_ETM: Can't link populated trace ID lists\n"); return -EINVAL;
}
switch (cs_etm_magic) { case __perf_cs_etmv3_magic:
*trace_chan_id = (u8)(cpu_metadata[CS_ETM_ETMTRACEIDR] &
CORESIGHT_TRACE_ID_VAL_MASK); break; case __perf_cs_etmv4_magic: case __perf_cs_ete_magic:
*trace_chan_id = (u8)(cpu_metadata[CS_ETMV4_TRCTRACEIDR] &
CORESIGHT_TRACE_ID_VAL_MASK); break; default: return -EINVAL;
} return 0;
}
/* * update metadata trace ID from the value found in the AUX_HW_INFO packet.
*/ staticint cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
{
u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
switch (cs_etm_magic) { case __perf_cs_etmv3_magic:
cpu_metadata[CS_ETM_ETMTRACEIDR] = trace_chan_id; break; case __perf_cs_etmv4_magic: case __perf_cs_ete_magic:
cpu_metadata[CS_ETMV4_TRCTRACEIDR] = trace_chan_id; break;
default: return -EINVAL;
} return 0;
}
/* * Get a metadata index for a specific cpu from an array. *
*/ staticint get_cpu_data_idx(struct cs_etm_auxtrace *etm, int cpu)
{ int i;
for (i = 0; i < etm->num_cpu; i++) { if (etm->metadata[i][CS_ETM_CPU] == (u64)cpu) { return i;
}
}
return -1;
}
/* * Get a metadata for a specific cpu from an array. *
*/ static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu)
{ int idx = get_cpu_data_idx(etm, cpu);
return (idx != -1) ? etm->metadata[idx] : NULL;
}
/* * Handle the PERF_RECORD_AUX_OUTPUT_HW_ID event. * * The payload associates the Trace ID and the CPU. * The routine is tolerant of seeing multiple packets with the same association, * but a CPU / Trace ID association changing during a session is an error.
*/ staticint cs_etm__process_aux_output_hw_id(struct perf_session *session, union perf_event *event)
{ struct cs_etm_auxtrace *etm; struct perf_sample sample; struct evsel *evsel;
u64 hw_id; int cpu, version, err;
/* extract and parse the HW ID */
hw_id = event->aux_output_hw_id.hw_id;
version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
/* check that we can handle this version */ if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
version); return -EINVAL;
}
/* get access to the etm metadata */
etm = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); if (!etm || !etm->metadata) return -EINVAL;
/* parse the sample to get the CPU */
evsel = evlist__event2evsel(session->evlist, event); if (!evsel) return -EINVAL;
perf_sample__init(&sample, /*all=*/false);
err = evsel__parse_sample(evsel, event, &sample); if (err) goto out;
cpu = sample.cpu; if (cpu == -1) { /* no CPU in the sample - possibly recorded with an old version of perf */
pr_err("CS_ETM: no CPU AUX_OUTPUT_HW_ID sample. Use compatible perf to record.");
err = -EINVAL; goto out;
}
void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
u8 trace_chan_id)
{ /* * When a timestamp packet is encountered the backend code * is stopped so that the front end has time to process packets * that were accumulated in the traceID queue. Since there can * be more than one channel per cs_etm_queue, we need to specify * what traceID queue needs servicing.
*/
etmq->pending_timestamp_chan_id = trace_chan_id;
}
if (etm->per_thread_decoding)
trace_chan_id = CS_ETM_PER_THREAD_TRACEID;
traceid_queues_list = etmq->traceid_queues_list;
/* * Check if the traceid_queue exist for this traceID by looking * in the queue list.
*/
inode = intlist__find(traceid_queues_list, trace_chan_id); if (inode) {
idx = (int)(intptr_t)inode->priv; return etmq->traceid_queues[idx];
}
/* We couldn't find a traceid_queue for this traceID, allocate one */
tidq = malloc(sizeof(*tidq)); if (!tidq) return NULL;
memset(tidq, 0, sizeof(*tidq));
/* Get a valid index for the new traceid_queue */
idx = intlist__nr_entries(traceid_queues_list); /* Memory for the inode is free'ed in cs_etm_free_traceid_queues () */
inode = intlist__findnew(traceid_queues_list, trace_chan_id); if (!inode) goto out_free;
/* Associate this traceID with this index */
inode->priv = (void *)(intptr_t)idx;
if (cs_etm__init_traceid_queue(etmq, tidq, trace_chan_id)) goto out_free;
/* Grow the traceid_queues array by one unit */
traceid_queues = etmq->traceid_queues;
traceid_queues = reallocarray(traceid_queues,
idx + 1, sizeof(*traceid_queues));
/* * On failure reallocarray() returns NULL and the original block of * memory is left untouched.
*/ if (!traceid_queues) goto out_free;
out_free: /* * Function intlist__remove() removes the inode from the list * and delete the memory associated to it.
*/
intlist__remove(traceid_queues_list, inode);
free(tidq);
if (etm->synth_opts.branches || etm->synth_opts.last_branch ||
etm->synth_opts.instructions) { /* * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. * * Threads and exception levels are also tracked for both the * previous and current packets. This is because the previous * packet is used for the 'from' IP for branch samples, so the * thread at that time must also be assigned to that sample. * Across discontinuity packets the thread can change, so by * tracking the thread for the previous packet the branch sample * will have the correct info.
*/
tmp = tidq->packet;
tidq->packet = tidq->prev_packet;
tidq->prev_packet = tmp;
tidq->prev_packet_el = tidq->el;
thread__put(tidq->prev_packet_thread);
tidq->prev_packet_thread = thread__get(tidq->thread);
}
}
if (etm->timeless_decoding) { /* * Pass tid = -1 to process all queues. But likely they will have * already been processed on PERF_RECORD_EXIT anyway.
*/ return cs_etm__process_timeless_queues(etm, -1);
}
/* Free this traceid_queue from the array */
tidq = etmq->traceid_queues[idx];
thread__zput(tidq->thread);
thread__zput(tidq->prev_packet_thread);
zfree(&tidq->event_buf);
zfree(&tidq->last_branch);
zfree(&tidq->last_branch_rb);
zfree(&tidq->prev_packet);
zfree(&tidq->packet);
zfree(&tidq);
/* * Function intlist__remove() removes the inode from the list * and delete the memory associated to it.
*/
intlist__remove(traceid_queues_list, inode);
}
/* Then the RB tree itself */
intlist__delete(traceid_queues_list);
etmq->traceid_queues_list = NULL;
/* finally free the traceid_queues array */
zfree(&etmq->traceid_queues);
}
if (etmq->own_traceid_list) { /* First remove all traceID/metadata nodes for the RB tree */
intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
intlist__remove(etmq->own_traceid_list, inode);
/* Then the RB tree itself */
intlist__delete(etmq->own_traceid_list);
}
/* * For any virtualisation based on nVHE (e.g. pKVM), or host kernels * running at EL1 assume everything is the host.
*/ if (pid_fmt == CS_ETM_PIDFMT_CTXTID) return &etmq->etm->session->machines.host;
/* * Not perfect, but otherwise assume anything in EL1 is the default * guest, and everything else is the host. Distinguishing between guest * and host userspaces isn't currently supported either. Neither is * multiple guest support. All this does is reduce the likeliness of * decode errors where we look into the host kernel maps when it should * have been the guest maps.
*/ switch (el) { case ocsd_EL1: return machines__find_guest(&etmq->etm->session->machines,
DEFAULT_GUEST_KERNEL_ID); case ocsd_EL3: case ocsd_EL2: case ocsd_EL0: case ocsd_EL_unknown: default: return &etmq->etm->session->machines.host;
}
}
if (address >= machine__kernel_start(machine)) { if (machine__is_host(machine)) return PERF_RECORD_MISC_KERNEL; else return PERF_RECORD_MISC_GUEST_KERNEL;
} else { if (machine__is_host(machine)) return PERF_RECORD_MISC_USER; else { /* * Can't really happen at the moment because * cs_etm__get_machine() will always return * machines.host for any non EL1 trace.
*/ return PERF_RECORD_MISC_GUEST_USER;
}
}
}
addr_location__init(&al);
tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); if (!tidq) goto out;
/* * We've already tracked EL along side the PID in cs_etm__set_thread() * so double check that it matches what OpenCSD thinks as well. It * doesn't distinguish between EL0 and EL1 for this mem access callback * so we had to do the extra tracking. Skip validation if it's any of * the 'any' values.
*/ if (!(mem_space == OCSD_MEM_SPACE_ANY ||
mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) { if (mem_space & OCSD_MEM_SPACE_EL1N) { /* Includes both non secure EL1 and EL0 */
assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0);
} elseif (mem_space & OCSD_MEM_SPACE_EL2)
assert(tidq->el == ocsd_EL2); elseif (mem_space & OCSD_MEM_SPACE_EL3)
assert(tidq->el == ocsd_EL3);
}
if (!thread__find_map(tidq->thread, cpumode, address, &al)) goto out;
dso = map__dso(al.map); if (!dso) goto out;
if (dso__data(dso)->status == DSO_DATA_STATUS_ERROR &&
dso__data_status_seen(dso, DSO_DATA_STATUS_SEEN_ITRACE)) goto out;
offset = map__map_ip(al.map, address);
map__load(al.map);
len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)),
offset, buffer, size);
if (len <= 0) {
ui__warning_once("CS ETM Trace: Missing DSO. Use 'perf archive' or debuginfod to export data from the traced system.\n" " Enable CONFIG_PROC_KCORE or use option '-k /path/to/vmlinux' for kernel symbols.\n"); if (!dso__auxtrace_warned(dso)) {
pr_err("CS ETM Trace: Debug data not found for address %#"PRIx64" in %s\n",
address,
dso__long_name(dso) ? dso__long_name(dso) : "Unknown");
dso__set_auxtrace_warned(dso);
} goto out;
}
ret = len;
out:
addr_location__exit(&al); return ret;
}
etmq->traceid_queues_list = intlist__new(NULL); if (!etmq->traceid_queues_list) goto out_free;
/* * Create an RB tree for traceID-metadata tuple. Since the conversion * has to be made for each packet that gets decoded, optimizing access * in anything other than a sequential array is worth doing.
*/
etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL); if (!etmq->traceid_list) goto out_free;
queue->priv = etmq;
etmq->etm = etm;
etmq->queue_nr = queue_nr;
queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
etmq->offset = 0;
etmq->sink_id = SINK_UNSET;
return 0;
}
staticint cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, struct cs_etm_queue *etmq, unsignedint queue_nr)
{ int ret = 0; unsignedint cs_queue_nr;
u8 trace_chan_id;
u64 cs_timestamp;
/* * We are under a CPU-wide trace scenario. As such we need to know * when the code that generated the traces started to execute so that * it can be correlated with execution on other CPUs. So we get a * handle on the beginning of traces and decode until we find a * timestamp. The timestamp is then added to the auxtrace min heap * in order to know what nibble (of all the etmqs) to decode first.
*/ while (1) { /* * Fetch an aux_buffer from this etmq. Bail if no more * blocks or an error has been encountered.
*/
ret = cs_etm__get_data_block(etmq); if (ret <= 0) goto out;
/* * Run decoder on the trace block. The decoder will stop when * encountering a CS timestamp, a full packet queue or the end of * trace for that block.
*/
ret = cs_etm__decode_data_block(etmq); if (ret) goto out;
/* * Function cs_etm_decoder__do_{hard|soft}_timestamp() does all * the timestamp calculation for us.
*/
cs_timestamp = cs_etm__etmq_get_timestamp(etmq, &trace_chan_id);
/* We found a timestamp, no need to continue. */ if (cs_timestamp) break;
/* * We didn't find a timestamp so empty all the traceid packet * queues before looking for another timestamp packet, either * in the current data block or a new one. Packets that were * just decoded are useless since no timestamp has been * associated with them. As such simply discard them.
*/
cs_etm__clear_all_packet_queues(etmq);
}
/* * We have a timestamp. Add it to the min heap to reflect when * instructions conveyed by the range packets of this traceID queue * started to execute. Once the same has been done for all the traceID * queues of each etmq, redenring and decoding can start in * chronological order. * * Note that packets decoded above are still in the traceID's packet * queue and will be processed in cs_etm__process_timestamped_queues().
*/
cs_queue_nr = TO_CS_QUEUE_NR(queue_nr, trace_chan_id);
ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp);
out: return ret;
}
/* * Set the number of records before early exit: ->nr is used to * determine how many branches to copy from ->entries.
*/
bs_dst->nr = bs_src->nr;
/* * Early exit when there is nothing to copy.
*/ if (!bs_src->nr) return;
/* * As bs_src->entries is a circular buffer, we need to copy from it in * two steps. First, copy the branches from the most recently inserted * branch ->last_branch_pos until the end of bs_src->entries buffer.
*/
nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos;
memcpy(&bs_dst->entries[0],
&bs_src->entries[tidq->last_branch_pos], sizeof(struct branch_entry) * nr);
/* * If we wrapped around at least once, the branches from the beginning * of the bs_src->entries buffer and until the ->last_branch_pos element * are older valid branches: copy them over. The total number of * branches copied over will be equal to the number of branches asked by * the user in last_branch_sz.
*/ if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) {
memcpy(&bs_dst->entries[nr],
&bs_src->entries[0], sizeof(struct branch_entry) * tidq->last_branch_pos);
}
}
cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes),
instrBytes, 0); /* * T32 instruction size is indicated by bits[15:11] of the first * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 * denote a 32-bit instruction.
*/ return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
}
staticinline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
{ /* * Return 0 for packets that have no addresses so that CS_ETM_INVAL_ADDR doesn't * appear in samples.
*/ if (packet->sample_type == CS_ETM_DISCONTINUITY ||
packet->sample_type == CS_ETM_EXCEPTION) return 0;
return packet->start_addr;
}
staticinline
u64 cs_etm__last_executed_instr(conststruct cs_etm_packet *packet)
{ /* Returns 0 for the CS_ETM_DISCONTINUITY packet */ if (packet->sample_type == CS_ETM_DISCONTINUITY) return 0;
/* * The branches are recorded in a circular buffer in reverse * chronological order: we start recording from the last element of the * buffer down. After writing the first element of the stack, move the * insert position back to the end of the buffer.
*/ if (!tidq->last_branch_pos)
tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz;
tidq->last_branch_pos -= 1;
be = &bs->entries[tidq->last_branch_pos];
be->from = cs_etm__last_executed_instr(tidq->prev_packet);
be->to = cs_etm__first_executed_instr(tidq->packet); /* No support for mispredict */
be->flags.mispred = 0;
be->flags.predicted = 1;
/* * Increment bs->nr until reaching the number of last branches asked by * the user on the command line.
*/ if (bs->nr < etmq->etm->synth_opts.last_branch_sz)
bs->nr += 1;
}
/* If no more data, drop the previous auxtrace_buffer and return */ if (!aux_buffer) { if (old_buffer)
auxtrace_buffer__drop_data(old_buffer);
etmq->buf_len = 0; return 0;
}
etmq->buffer = aux_buffer;
/* If the aux_buffer doesn't have data associated, try to load it */ if (!aux_buffer->data) { /* get the file desc associated with the perf data file */ int fd = perf_data__fd(etmq->etm->session->data);
aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); if (!aux_buffer->data) return -ENOMEM;
}
/* If valid, drop the previous buffer */ if (old_buffer)
auxtrace_buffer__drop_data(old_buffer);
if (etm->synth_opts.last_branch) {
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; /* * We don't use the hardware index, but the sample generation * code uses the new format branch_stack with this field, * so the event attributes must indicate that it's present.
*/
attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
}
/* * Record a branch when the last instruction in * PREV_PACKET is a branch.
*/ if (etm->synth_opts.last_branch &&
tidq->prev_packet->sample_type == CS_ETM_RANGE &&
tidq->prev_packet->last_instr_taken_branch)
cs_etm__update_last_branch_rb(etmq, tidq);
if (etm->synth_opts.instructions &&
tidq->period_instructions >= etm->instructions_sample_period) { /* * Emit instruction sample periodically * TODO: allow period to be defined in cycles and clock time
*/
/* * Below diagram demonstrates the instruction samples * generation flows: * * Instrs Instrs Instrs Instrs * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3) * | | | | * V V V V * -------------------------------------------------- * ^ ^ * | | * Period Period * instructions(Pi) instructions(Pi') * * | | * \---------------- -----------------/ * V * tidq->packet->instr_count * * Instrs Sample(n...) are the synthesised samples occurring * every etm->instructions_sample_period instructions - as * defined on the perf command line. Sample(n) is being the * last sample before the current etm packet, n+1 to n+3 * samples are generated from the current etm packet. * * tidq->packet->instr_count represents the number of * instructions in the current etm packet. * * Period instructions (Pi) contains the number of * instructions executed after the sample point(n) from the * previous etm packet. This will always be less than * etm->instructions_sample_period. * * When generate new samples, it combines with two parts * instructions, one is the tail of the old packet and another * is the head of the new coming packet, to generate * sample(n+1); sample(n+2) and sample(n+3) consume the * instructions with sample period. After sample(n+3), the rest * instructions will be used by later packet and it is assigned * to tidq->period_instructions for next round calculation.
*/
/* * Get the initial offset into the current packet instructions; * entry conditions ensure that instrs_prev is less than * etm->instructions_sample_period.
*/
u64 offset = etm->instructions_sample_period - instrs_prev;
u64 addr;
/* Prepare last branches for instruction sample */ if (etm->synth_opts.last_branch)
cs_etm__copy_last_branch_rb(etmq, tidq);
while (tidq->period_instructions >=
etm->instructions_sample_period) { /* * Calculate the address of the sampled instruction (-1 * as sample is reported as though instruction has just * been executed, but PC has not advanced to next * instruction)
*/
addr = cs_etm__instr_addr(etmq, trace_chan_id,
tidq->packet, offset - 1);
ret = cs_etm__synth_instruction_sample(
etmq, tidq, addr,
etm->instructions_sample_period); if (ret) return ret;
if (etm->synth_opts.branches) { bool generate_sample = false;
/* Generate sample for tracing on packet */ if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY)
generate_sample = true;
/* Generate sample for branch taken packet */ if (tidq->prev_packet->sample_type == CS_ETM_RANGE &&
tidq->prev_packet->last_instr_taken_branch)
generate_sample = true;
if (generate_sample) {
ret = cs_etm__synth_branch_sample(etmq, tidq); if (ret) return ret;
}
}
cs_etm__packet_swap(etm, tidq);
return 0;
}
staticint cs_etm__exception(struct cs_etm_traceid_queue *tidq)
{ /* * When the exception packet is inserted, whether the last instruction * in previous range packet is taken branch or not, we need to force * to set 'prev_packet->last_instr_taken_branch' to true. This ensures * to generate branch sample for the instruction range before the * exception is trapped to kernel or before the exception returning. * * The exception packet includes the dummy address values, so don't * swap PACKET with PREV_PACKET. This keeps PREV_PACKET to be useful * for generating instruction and branch samples.
*/ if (tidq->prev_packet->sample_type == CS_ETM_RANGE)
tidq->prev_packet->last_instr_taken_branch = true;
/* Prepare last branches for instruction sample */
cs_etm__copy_last_branch_rb(etmq, tidq);
/* * Generate a last branch event for the branches left in the * circular buffer at the end of the trace. * * Use the address of the end of the last reported execution * range
*/
addr = cs_etm__last_executed_instr(tidq->prev_packet);
if (etm->synth_opts.branches &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
err = cs_etm__synth_branch_sample(etmq, tidq); if (err) return err;
}
swap_packet:
cs_etm__packet_swap(etm, tidq);
/* Reset last branches after flush the trace */ if (etm->synth_opts.last_branch)
cs_etm__reset_last_branch_rb(tidq);
return err;
}
staticint cs_etm__end_block(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq)
{ int err;
/* * It has no new packet coming and 'etmq->packet' contains the stale * packet which was set at the previous time with packets swapping; * so skip to generate branch sample to avoid stale packet. * * For this case only flush branch stack and generate a last branch * event for the branches left in the circular buffer at the end of * the trace.
*/ if (etmq->etm->synth_opts.last_branch &&
etmq->etm->synth_opts.instructions &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
u64 addr;
/* Prepare last branches for instruction sample */
cs_etm__copy_last_branch_rb(etmq, tidq);
/* * Use the address of the end of the last reported execution * range.
*/
addr = cs_etm__last_executed_instr(tidq->prev_packet);
return 0;
} /* * cs_etm__get_data_block: Fetch a block from the auxtrace_buffer queue * if need be. * Returns: < 0 if error * = 0 if no more auxtrace_buffer to read * > 0 if the current buffer isn't empty yet
*/ staticint cs_etm__get_data_block(struct cs_etm_queue *etmq)
{ int ret;
if (!etmq->buf_len) {
ret = cs_etm__get_trace(etmq); if (ret <= 0) return ret; /* * We cannot assume consecutive blocks in the data file * are contiguous, reset the decoder to force re-sync.
*/
ret = cs_etm_decoder__reset(etmq->decoder); if (ret) return ret;
}
switch (packet->isa) { case CS_ETM_ISA_T32: /* * The SVC of T32 is defined in ARM DDI 0487D.a, F5.1.247: * * b'15 b'8 * +-----------------+--------+ * | 1 1 0 1 1 1 1 1 | imm8 | * +-----------------+--------+ * * According to the specification, it only defines SVC for T32 * with 16 bits instruction and has no definition for 32bits; * so below only read 2 bytes as instruction size for T32.
*/
addr = end_addr - 2;
cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16),
(u8 *)&instr16, 0); if ((instr16 & 0xFF00) == 0xDF00) returntrue;
break; case CS_ETM_ISA_A32: /* * The SVC of A32 is defined in ARM DDI 0487D.a, F5.1.247: * * b'31 b'28 b'27 b'24 * +---------+---------+-------------------------+ * | !1111 | 1 1 1 1 | imm24 | * +---------+---------+-------------------------+
*/
addr = end_addr - 4;
cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
(u8 *)&instr32, 0); if ((instr32 & 0x0F000000) == 0x0F000000 &&
(instr32 & 0xF0000000) != 0xF0000000) returntrue;
break; case CS_ETM_ISA_A64: /* * The SVC of A64 is defined in ARM DDI 0487D.a, C6.2.294: * * b'31 b'21 b'4 b'0 * +-----------------------+---------+-----------+ * | 1 1 0 1 0 1 0 0 0 0 0 | imm16 | 0 0 0 0 1 | * +-----------------------+---------+-----------+
*/
addr = end_addr - 4;
cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32),
(u8 *)&instr32, 0); if ((instr32 & 0xFFE0001F) == 0xd4000001) returntrue;
if (magic == __perf_cs_etmv3_magic) if (packet->exception_number == CS_ETMV3_EXC_SVC) returntrue;
/* * ETMv4 exception type CS_ETMV4_EXC_CALL covers SVC, SMC and * HVC cases; need to check if it's SVC instruction based on * packet address.
*/ if (magic == __perf_cs_etmv4_magic) { if (packet->exception_number == CS_ETMV4_EXC_CALL &&
cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
prev_packet->end_addr)) returntrue;
}
/* * For CS_ETMV4_EXC_CALL, except SVC other instructions * (SMC, HVC) are taken as sync exceptions.
*/ if (packet->exception_number == CS_ETMV4_EXC_CALL &&
!cs_etm__is_svc_instr(etmq, trace_chan_id, prev_packet,
prev_packet->end_addr)) returntrue;
/* * ETMv4 has 5 bits for exception number; if the numbers * are in the range ( CS_ETMV4_EXC_FIQ, CS_ETMV4_EXC_END ] * they are implementation defined exceptions. * * For this case, simply take it as sync exception.
*/ if (packet->exception_number > CS_ETMV4_EXC_FIQ &&
packet->exception_number <= CS_ETMV4_EXC_END) returntrue;
}
switch (packet->sample_type) { case CS_ETM_RANGE: /* * Immediate branch instruction without neither link nor * return flag, it's normal branch instruction within * the function.
*/ if (packet->last_instr_type == OCSD_INSTR_BR &&
packet->last_instr_subtype == OCSD_S_INSTR_NONE) {
packet->flags = PERF_IP_FLAG_BRANCH;
if (packet->last_instr_cond)
packet->flags |= PERF_IP_FLAG_CONDITIONAL;
}
/* * Immediate branch instruction with link (e.g. BL), this is * branch instruction for function call.
*/ if (packet->last_instr_type == OCSD_INSTR_BR &&
packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_CALL;
/* * Indirect branch instruction with link (e.g. BLR), this is * branch instruction for function call.
*/ if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
packet->last_instr_subtype == OCSD_S_INSTR_BR_LINK)
packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_CALL;
/* * Indirect branch instruction with subtype of * OCSD_S_INSTR_V7_IMPLIED_RET, this is explicit hint for * function return for A32/T32.
*/ if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
packet->last_instr_subtype == OCSD_S_INSTR_V7_IMPLIED_RET)
packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_RETURN;
/* * Indirect branch instruction without link (e.g. BR), usually * this is used for function return, especially for functions * within dynamic link lib.
*/ if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
packet->last_instr_subtype == OCSD_S_INSTR_NONE)
packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_RETURN;
/* Return instruction for function return. */ if (packet->last_instr_type == OCSD_INSTR_BR_INDIRECT &&
packet->last_instr_subtype == OCSD_S_INSTR_V8_RET)
packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_RETURN;
/* * Decoder might insert a discontinuity in the middle of * instruction packets, fixup prev_packet with flag * PERF_IP_FLAG_TRACE_BEGIN to indicate restarting trace.
*/ if (prev_packet->sample_type == CS_ETM_DISCONTINUITY)
prev_packet->flags |= PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_TRACE_BEGIN;
/* * If the previous packet is an exception return packet * and the return address just follows SVC instruction, * it needs to calibrate the previous packet sample flags * as PERF_IP_FLAG_SYSCALLRET.
*/ if (prev_packet->flags == (PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_INTERRUPT) &&
cs_etm__is_svc_instr(etmq, trace_chan_id,
packet, packet->start_addr))
prev_packet->flags = PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_SYSCALLRET; break; case CS_ETM_DISCONTINUITY: /* * The trace is discontinuous, if the previous packet is * instruction packet, set flag PERF_IP_FLAG_TRACE_END * for previous packet.
*/ if (prev_packet->sample_type == CS_ETM_RANGE)
prev_packet->flags |= PERF_IP_FLAG_BRANCH |
PERF_IP_FLAG_TRACE_END; break; case CS_ETM_EXCEPTION:
ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic); if (ret) return ret;
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.56 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.