/* * For requesting a deferred user space stack trace from NMI context * the architecture must support a safe cmpxchg in NMI context. * For those architectures that do not have that, then it cannot ask * for a deferred user space stack trace from an NMI context. If it * does, then it will get -EINVAL.
*/ #ifdefined(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) # define CAN_USE_IN_NMI 1 staticinlinebool try_assign_cnt(struct unwind_task_info *info, u32 cnt)
{
u32 old = 0;
return try_cmpxchg(&info->id.cnt, &old, cnt);
} #else # define CAN_USE_IN_NMI 0 /* When NMIs are not allowed, this always succeeds */ staticinlinebool try_assign_cnt(struct unwind_task_info *info, u32 cnt)
{
info->id.cnt = cnt; returntrue;
} #endif
/* Make the cache fit in a 4K page */ #define UNWIND_MAX_ENTRIES \
((SZ_4K - sizeof(struct unwind_cache)) / sizeof(long))
/* Guards adding to or removing from the list of callbacks */ static DEFINE_MUTEX(callback_mutex); static LIST_HEAD(callbacks);
/* * This is a unique percpu identifier for a given task entry context. * Conceptually, it's incremented every time the CPU enters the kernel from * user space, so that each "entry context" on the CPU gets a unique ID. In * reality, as an optimization, it's only incremented on demand for the first * deferred unwind request after a given entry-from-user. * * It's combined with the CPU id to make a systemwide-unique "context cookie".
*/ static DEFINE_PER_CPU(u32, unwind_ctx_ctr);
/* * The context cookie is a unique identifier that is assigned to a user * space stacktrace. As the user space stacktrace remains the same while * the task is in the kernel, the cookie is an identifier for the stacktrace. * Although it is possible for the stacktrace to get another cookie if another * request is made after the cookie was cleared and before reentering user * space.
*/ static u64 get_cookie(struct unwind_task_info *info)
{
u32 cnt = 1;
if (info->id.cpu) return info->id.id;
/* LSB is always set to ensure 0 is an invalid value */
cnt |= __this_cpu_read(unwind_ctx_ctr) + 2; if (try_assign_cnt(info, cnt)) { /* Update the per cpu counter */
__this_cpu_write(unwind_ctx_ctr, cnt);
} /* Interrupts are disabled, the CPU will always be same */
info->id.cpu = smp_processor_id() + 1; /* Must be non zero */
return info->id.id;
}
/** * unwind_user_faultable - Produce a user stacktrace in faultable context * @trace: The descriptor that will store the user stacktrace * * This must be called in a known faultable context (usually when entering * or exiting user space). Depending on the available implementations * the @trace will be loaded with the addresses of the user space stacktrace * if it can be found. * * Return: 0 on success and negative on error * On success @trace will contain the user space stacktrace
*/ int unwind_user_faultable(struct unwind_stacktrace *trace)
{ struct unwind_task_info *info = ¤t->unwind_info; struct unwind_cache *cache;
/* Should always be called from faultable context */
might_fault();
if (!current->mm) return -EINVAL;
if (!info->cache) {
info->cache = kzalloc(struct_size(cache, entries, UNWIND_MAX_ENTRIES),
GFP_KERNEL); if (!info->cache) return -ENOMEM;
}
if (cache->nr_entries) { /* * The user stack has already been previously unwound in this * entry context. Skip the unwind and use the cache.
*/
trace->nr = cache->nr_entries; return 0;
}
/* Clear pending bit but make sure to have the current bits */
bits = atomic_long_fetch_andnot(UNWIND_PENDING,
(atomic_long_t *)&info->unwind_mask); /* * From here on out, the callback must always be called, even if it's * just an empty trace.
*/
trace.nr = 0;
trace.entries = NULL;
unwind_user_faultable(&trace);
if (info->cache)
bits &= ~(info->cache->unwind_completed);
/** * unwind_deferred_request - Request a user stacktrace on task kernel exit * @work: Unwind descriptor requesting the trace * @cookie: The cookie of the first request made for this task * * Schedule a user space unwind to be done in task work before exiting the * kernel. * * The returned @cookie output is the generated cookie of the very first * request for a user space stacktrace for this task since it entered the * kernel. It can be from a request by any caller of this infrastructure. * Its value will also be passed to the callback function. It can be * used to stitch kernel and user stack traces together in post-processing. * * It's valid to call this function multiple times for the same @work within * the same task entry context. Each call will return the same cookie * while the task hasn't left the kernel. If the callback is not pending * because it has already been previously called for the same entry context, * it will be called again with the same stack trace and cookie. * * Return: 0 if the callback successfully was queued. * 1 if the callback is pending or was already executed. * Negative if there's an error. * @cookie holds the cookie of the first request by any user
*/ int unwind_deferred_request(struct unwind_work *work, u64 *cookie)
{ struct unwind_task_info *info = ¤t->unwind_info; unsignedlong old, bits; unsignedlong bit; int ret;
*cookie = 0;
if ((current->flags & (PF_KTHREAD | PF_EXITING)) ||
!user_mode(task_pt_regs(current))) return -EINVAL;
/* * NMI requires having safe cmpxchg operations. * Trigger a warning to make it obvious that an architecture * is using this in NMI when it should not be.
*/ if (WARN_ON_ONCE(!CAN_USE_IN_NMI && in_nmi())) return -EINVAL;
/* Do not allow cancelled works to request again */
bit = READ_ONCE(work->bit); if (WARN_ON_ONCE(bit < 0)) return -EINVAL;
/* Only need the mask now */
bit = BIT(bit);
guard(irqsave)();
*cookie = get_cookie(info);
old = READ_ONCE(info->unwind_mask);
/* Is this already queued or executed */ if (old & bit) return 1;
/* * This work's bit hasn't been set yet. Now set it with the PENDING * bit and fetch the current value of unwind_mask. If ether the * work's bit or PENDING was already set, then this is already queued * to have a callback.
*/
bits = UNWIND_PENDING | bit;
old = atomic_long_fetch_or(bits, (atomic_long_t *)&info->unwind_mask); if (old & bits) { /* * If the work's bit was set, whatever set it had better * have also set pending and queued a callback.
*/
WARN_ON_ONCE(!(old & UNWIND_PENDING)); return old & bit;
}
/* The work has been claimed, now schedule it. */
ret = task_work_add(current, &info->work, TWA_RESUME);
if (WARN_ON_ONCE(ret))
WRITE_ONCE(info->unwind_mask, 0);
/* Do not allow any more requests and prevent callbacks */
work->bit = -1;
__clear_bit(bit, &unwind_mask);
synchronize_srcu(&unwind_srcu);
guard(rcu)(); /* Clear this bit from all threads */
for_each_process_thread(g, t) {
clear_bit(bit, &t->unwind_info.unwind_mask); if (t->unwind_info.cache)
clear_bit(bit, &t->unwind_info.cache->unwind_completed);
}
}
int unwind_deferred_init(struct unwind_work *work, unwind_callback_t func)
{
memset(work, 0, sizeof(*work));
guard(mutex)(&callback_mutex);
/* See if there's a bit in the mask available */ if (unwind_mask == ~0UL) return -EBUSY;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.