/* if HT is enable a maximum of 4 events (5 if one is instructions * retired can be specified, if HT is disabled a maximum of 8 (9 if * one is instructions retired) can be specified. * * From Table 19-1. Architectural Performance Events * Architectures Software Developer’s Manual Volume 3: System Programming * Guide
*/ struct raw_perf_event perf_events[] = {
{ 0x3c, 0x00, NULL, "Unhalted CPU Cycles" },
{ 0xc0, 0x00, NULL, "Instruction Retired" }
};
#define NUM_EVTS (ARRAY_SIZE(perf_events))
/* WARNING: PMU config is currently broken!
*/ bool time_bench_PMU_config(bool enable)
{ int i; struct perf_event_attr perf_conf; struct perf_event *perf_event; int cpu;
preempt_disable();
cpu = smp_processor_id();
pr_info("DEBUG: cpu:%d\n", cpu);
preempt_enable();
if (rec->flags & TIME_BENCH_LOOP) { if (rec->invoked_cnt < 1000) {
pr_err("ERR: need more(>1000) loops(%llu) for timing\n",
rec->invoked_cnt); returnfalse;
} if (rec->invoked_cnt > ((1ULL << 32) - 1)) { /* div_u64_rem() can only support div with 32bit*/
pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n",
rec->invoked_cnt); returnfalse;
}
invoked_cnt = (uint32_t)rec->invoked_cnt;
}
/* TSC (Time-Stamp Counter) records */ if (rec->flags & TIME_BENCH_TSC) {
rec->tsc_interval = rec->tsc_stop - rec->tsc_start; if (rec->tsc_interval == 0) {
pr_err("ABORT: timing took ZERO TSC time\n"); returnfalse;
} /* Calculate stats */ if (rec->flags & TIME_BENCH_LOOP)
rec->tsc_cycles = rec->tsc_interval / invoked_cnt; else
rec->tsc_cycles = rec->tsc_interval;
}
/* Wall-clock time calc */ if (rec->flags & TIME_BENCH_WALLCLOCK) {
rec->time_start = rec->ts_start.tv_nsec +
(NANOSEC_PER_SEC * rec->ts_start.tv_sec);
rec->time_stop = rec->ts_stop.tv_nsec +
(NANOSEC_PER_SEC * rec->ts_stop.tv_sec);
rec->time_interval = rec->time_stop - rec->time_start; if (rec->time_interval == 0) {
pr_err("ABORT: timing took ZERO wallclock time\n"); returnfalse;
} /* Calculate stats */ /*** Division in kernel it tricky ***/ /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */ /* remainder only correct because NANOSEC_PER_SEC is 10^9 */
rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC,
&rec->time_sec_remainder); //TODO: use existing struct timespec records instead of div?
if (rec->flags & TIME_BENCH_LOOP) { /*** Division in kernel it tricky ***/ /* Orig: ns = ((double)time_interval / invoked_cnt); */ /* First get quotient */
rec->ns_per_call_quotient =
div_u64_rem(rec->time_interval, invoked_cnt,
&ns_per_call_remainder); /* Now get decimals .xxx precision (incorrect roundup)*/
ns_per_call_tmp_rem = ns_per_call_remainder;
invoked_cnt_precision = invoked_cnt / 1000; if (invoked_cnt_precision > 0) {
rec->ns_per_call_decimal =
div_u64_rem(ns_per_call_tmp_rem,
invoked_cnt_precision,
&ns_per_call_remainder);
}
}
}
/* Calc Instruction Per Cycle (IPC) */ /* First get quotient */
rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk,
&pmc_ipc_remainder); /* Now get decimals .xxx precision (incorrect roundup)*/
pmc_ipc_tmp_rem = pmc_ipc_remainder;
pmc_ipc_div = rec->pmc_clk / 1000; if (pmc_ipc_div > 0) {
rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem,
pmc_ipc_div,
&pmc_ipc_remainder);
}
}
returntrue;
}
/* Generic function for invoking a loop function and calculating * execution time stats. The function being called/timed is assumed * to perform a tight loop, and update the timing record struct.
*/ bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, int (*func)(struct time_bench_record *record, void *data))
{ struct time_bench_record rec;
/* Setup record */
memset(&rec, 0, sizeof(rec)); /* zero func might not update all */
rec.version_abi = 1;
rec.loops = loops;
rec.step = step;
rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK);
/*** Loop function being timed ***/ if (!func(&rec, data)) {
pr_err("ABORT: function being timed failed\n"); returnfalse;
}
if (rec.invoked_cnt < loops)
pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n",
rec.invoked_cnt, loops);
/* Restrict CPU */
cpumask_set_cpu(cpu->rec.cpu, &newmask);
set_cpus_allowed_ptr(current, &newmask);
/* Synchronize start of concurrency test */
atomic_inc(&sync->nr_tests_running);
wait_for_completion(&sync->start_event);
/* Start benchmark function */ if (!cpu->bench_func(&cpu->rec, data)) {
pr_err("ERROR: function being timed failed on CPU:%d(%d)\n",
cpu->rec.cpu, smp_processor_id());
} else { if (verbose)
pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu,
smp_processor_id());
}
cpu->did_bench_run = true;
/* End test */
atomic_dec(&sync->nr_tests_running); /* Wait for kthread_stop() telling us to stop */ while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
}
__set_current_state(TASK_RUNNING); return 0;
}
void time_bench_print_stats_cpumask(constchar *desc, struct time_bench_cpu *cpu_tasks, conststruct cpumask *mask)
{
uint64_t average = 0; int cpu; int step = 0; struct sum {
uint64_t tsc_cycles; int records;
} sum = { 0 };
/* Spawn off jobs on all CPUs */
for_each_cpu(cpu, mask) { struct time_bench_cpu *c = &cpu_tasks[cpu];
running++;
c->sync = sync; /* Send sync variable along */
c->data = data; /* Send opaque along */
/* Init benchmark record */
memset(&c->rec, 0, sizeof(struct time_bench_record));
c->rec.version_abi = 1;
c->rec.loops = loops;
c->rec.step = step;
c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC |
TIME_BENCH_WALLCLOCK);
c->rec.cpu = cpu;
c->bench_func = func;
c->task = kthread_run(invoke_test_on_cpu_func, c, "time_bench%d", cpu); if (IS_ERR(c->task)) {
pr_err("%s(): Failed to start test func\n", __func__); return; /* Argh, what about cleanup?! */
}
}
/* Wait until all processes are running */ while (atomic_read(&sync->nr_tests_running) < running) {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(10);
} /* Kick off all CPU concurrently on completion event */
complete_all(&sync->start_event);
/* Wait for CPUs to finish */ while (atomic_read(&sync->nr_tests_running)) {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(10);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.