/* Use per-cpu array map for spinlock and rwlock */ if ((flags & (LCB_F_SPIN | LCB_F_MUTEX)) == LCB_F_SPIN) {
__u32 idx = 0;
pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx); /* Do not update the element for nested locks */ if (pelem && pelem->lock)
pelem = NULL; return pelem;
}
pid = bpf_get_current_pid_tgid();
pelem = bpf_map_lookup_elem(&tstamp, &pid); /* Do not update the element for nested locks */ if (pelem && pelem->lock) return NULL;
if (pelem == NULL) { struct tstamp_data zero = {};
otdata = bpf_map_lookup_elem(&owner_data, &pelem->lock);
id = get_owner_stack_id(buf);
/* * Contention just happens, or corner case `lock` is owned by process not * `owner_pid`. For the corner case we treat it as unexpected internal error and * just ignore the precvious tracing record.
*/ if (!otdata || otdata->pid != owner_pid) { struct owner_tracing_data first = {
.pid = owner_pid,
.timestamp = pelem->timestamp,
.count = 1,
.stack_id = id,
};
bpf_map_update_elem(&owner_data, &pelem->lock, &first, BPF_ANY);
} /* Contention is ongoing and new waiter joins */ else {
__sync_fetch_and_add(&otdata->count, 1);
/* * The owner is the same, but stacktrace might be changed. In this case we * store/update `owner_stat` based on current owner stack id.
*/ if (id != otdata->stack_id) {
update_owner_stat(id, pelem->timestamp - otdata->timestamp,
pelem->flags);
/* * For spinlock and rwlock, it needs to get the timestamp for the * per-cpu map. However, contention_end does not have the flags * so it cannot know whether it reads percpu or hash map. * * Try per-cpu map first and check if there's active contention. * If it is, do not read hash map because it cannot go to sleeping * locks before releasing the spinning locks.
*/
pelem = bpf_map_lookup_elem(&tstamp_cpu, &idx); if (pelem && pelem->lock) { if (pelem->lock != ctx[0]) return 0;
} else {
pid = bpf_get_current_pid_tgid();
pelem = bpf_map_lookup_elem(&tstamp, &pid); if (!pelem || pelem->lock != ctx[0]) return 0;
need_delete = true;
}
/* No contention is occurring, delete `lock` entry in `owner_data` */ if (otdata->count <= 1)
bpf_map_delete_elem(&owner_data, &pelem->lock); /* * Contention is still ongoing, with a new owner (current task). `owner_data` * should be updated accordingly.
*/ else {
u32 i = 0;
s32 ret = (s32)ctx[1];
u64 *buf;
buf = bpf_map_lookup_elem(&stack_buf, &i); if (!buf) goto skip_owner; for (i = 0; i < (u32)max_stack; i++)
buf[i] = 0x0;
/* * `ret` has the return code of the lock function. * If `ret` is negative, the current task terminates lock waiting without * acquiring it. Owner is not changed, but we still need to update the owner * stack.
*/ if (ret < 0) {
s32 id = 0; struct task_struct *task;
if (!bpf_task_from_pid) goto skip_owner;
task = bpf_task_from_pid(otdata->pid); if (!task) goto skip_owner;
/* * If owner stack is changed, update owner stack id for this lock.
*/ if (id != otdata->stack_id)
otdata->stack_id = id;
} /* * Otherwise, update tracing data with the current task, which is the new * owner.
*/ else {
otdata->pid = pid; /* * We don't want to retrieve callstack here, since it is where the * current task acquires the lock and provides no additional * information. We simply assign -1 to invalidate it.
*/
otdata->stack_id = -1;
}
}
}
skip_owner: switch (aggr_mode) { case LOCK_AGGR_CALLER:
key.stack_id = pelem->stack_id; break; case LOCK_AGGR_TASK: if (lock_owner)
key.pid = pelem->flags; else { if (!need_delete)
pid = bpf_get_current_pid_tgid();
key.pid = pid;
} if (needs_callstack)
key.stack_id = pelem->stack_id; break; case LOCK_AGGR_ADDR:
key.lock_addr_or_cgroup = pelem->lock; if (needs_callstack)
key.stack_id = pelem->stack_id; break; case LOCK_AGGR_CGROUP:
key.lock_addr_or_cgroup = get_current_cgroup_id(); break; default: /* should not happen */ return 0;
}
data = bpf_map_lookup_elem(&lock_stat, &key); if (!data) { if (data_map_full) {
__sync_fetch_and_add(&data_fail, 1); goto out;
}
/* Check if it's from a slab object */ if (bpf_get_kmem_cache) { struct kmem_cache *s; struct slab_cache_data *d;
s = bpf_get_kmem_cache(pelem->lock); if (s != NULL) { /* * Save the ID of the slab cache in the flags * (instead of full address) to reduce the * space in the contention_data.
*/
d = bpf_map_lookup_elem(&slab_caches, &s); if (d != NULL)
first.flags |= d->id;
}
}
}
err = bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST); if (err < 0) { if (err == -EEXIST) { /* it lost the race, try to get it again */
data = bpf_map_lookup_elem(&lock_stat, &key); if (data != NULL) goto found;
} if (err == -E2BIG)
data_map_full = 1;
__sync_fetch_and_add(&data_fail, 1);
} goto out;
}
found:
update_contention_data(data, duration, 1);
out: if (lock_delay)
check_lock_delay(pelem->lock);
pelem->lock = 0; if (need_delete)
bpf_map_delete_elem(&tstamp, &pid); return 0;
}
SEC("raw_tp/bpf_test_finish") int BPF_PROG(end_timestamp)
{
end_ts = bpf_ktime_get_ns(); return 0;
}
/* * bpf_iter__kmem_cache added recently so old kernels don't have it in the * vmlinux.h. But we cannot add it here since it will cause a compiler error * due to redefinition of the struct on later kernels. * * So it uses a CO-RE trick to access the member only if it has the type. * This will support both old and new kernels without compiler errors.
*/ struct bpf_iter__kmem_cache___new { struct kmem_cache *s;
} __attribute__((preserve_access_index));
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.