Quelle rcu_pending.c Sprache: C

// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) "%s() " fmt "\n", __func__

#include <linux/generic-radix-tree.h>
#include <linux/mm.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/srcu.h>
#include <linux/vmalloc.h>

#include "rcu_pending.h"
#include "darray.h"
#include "util.h"

#define static_array_for_each(_a, _i)   \
for (typeof(&(_a)[0]) _i = _a;   \
      _i < (_a) + ARRAY_SIZE(_a);  \
      _i++)

enum rcu_pending_special {
RCU_PENDING_KVFREE = 1,
RCU_PENDING_CALL_RCU = 2,
};

#define RCU_PENDING_KVFREE_FN  ((rcu_pending_process_fn) (ulong) RCU_PENDING_KVFREE)
#define RCU_PENDING_CALL_RCU_FN  ((rcu_pending_process_fn) (ulong) RCU_PENDING_CALL_RCU)

#ifdef __KERNEL__
typedef unsigned long   rcu_gp_poll_state_t;

static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r)
{
return l == r;
}
#else
typedef struct urcu_gp_poll_state rcu_gp_poll_state_t;

static inline bool rcu_gp_poll_cookie_eq(rcu_gp_poll_state_t l, rcu_gp_poll_state_t r)
{
return l.grace_period_id == r.grace_period_id;
}
#endif

static inline rcu_gp_poll_state_t __get_state_synchronize_rcu(struct srcu_struct *ssp)
{
return ssp
  ? get_state_synchronize_srcu(ssp)
  : get_state_synchronize_rcu();
}

static inline rcu_gp_poll_state_t __start_poll_synchronize_rcu(struct srcu_struct *ssp)
{
return ssp
  ? start_poll_synchronize_srcu(ssp)
  : start_poll_synchronize_rcu();
}

static inline bool __poll_state_synchronize_rcu(struct srcu_struct *ssp, rcu_gp_poll_state_t cookie)
{
return ssp
  ? poll_state_synchronize_srcu(ssp, cookie)
  : poll_state_synchronize_rcu(cookie);
}

static inline void __rcu_barrier(struct srcu_struct *ssp)
{
return ssp
  ? srcu_barrier(ssp)
  : rcu_barrier();
}

static inline void __call_rcu(struct srcu_struct *ssp, struct rcu_head *rhp,
         rcu_callback_t func)
{
if (ssp)
  call_srcu(ssp, rhp, func);
else
  call_rcu(rhp, func);
}

struct rcu_pending_seq {
/*
* We're using a radix tree like a vector - we're just pushing elements
* onto the end; we're using a radix tree instead of an actual vector to
* avoid reallocation overhead
*/
GENRADIX(struct rcu_head *) objs;
size_t    nr;
struct rcu_head   **cursor;
rcu_gp_poll_state_t  seq;
};

struct rcu_pending_list {
struct rcu_head   *head;
struct rcu_head   *tail;
rcu_gp_poll_state_t  seq;
};

struct rcu_pending_pcpu {
struct rcu_pending  *parent;
spinlock_t   lock;
int    cpu;

/*
* We can't bound the number of unprocessed gp sequence numbers, and we
* can't efficiently merge radix trees for expired grace periods, so we
* need darray/vector:
*/
DARRAY_PREALLOCATED(struct rcu_pending_seq, 4) objs;

/* Third entry is for expired objects: */
struct rcu_pending_list  lists[NUM_ACTIVE_RCU_POLL_OLDSTATE + 1];

struct rcu_head   cb;
bool    cb_armed;
struct work_struct  work;
};

static bool __rcu_pending_has_pending(struct rcu_pending_pcpu *p)
{
if (p->objs.nr)
  return true;

static_array_for_each(p->lists, i)
  if (i->head)
   return true;

return false;
}

static void rcu_pending_list_merge(struct rcu_pending_list *l1,
       struct rcu_pending_list *l2)
{
#ifdef __KERNEL__
if (!l1->head)
  l1->head = l2->head;
else
  l1->tail->next = l2->head;
#else
if (!l1->head)
  l1->head = l2->head;
else
  l1->tail->next.next = (void *) l2->head;
#endif

l1->tail = l2->tail;
l2->head = l2->tail = NULL;
}

static void rcu_pending_list_add(struct rcu_pending_list *l,
     struct rcu_head *n)
{
#ifdef __KERNEL__
if (!l->head)
  l->head = n;
else
  l->tail->next = n;
l->tail = n;
n->next = NULL;
#else
if (!l->head)
  l->head = n;
else
  l->tail->next.next = (void *) n;
l->tail = n;
n->next.next = NULL;
#endif
}

static void merge_expired_lists(struct rcu_pending_pcpu *p)
{
struct rcu_pending_list *expired = &p->lists[NUM_ACTIVE_RCU_POLL_OLDSTATE];

for (struct rcu_pending_list *i = p->lists; i < expired; i++)
  if (i->head && __poll_state_synchronize_rcu(p->parent->srcu, i->seq))
   rcu_pending_list_merge(expired, i);
}

#ifndef __KERNEL__
static inline void kfree_bulk(size_t nr, void ** p)
{
while (nr--)
  kfree(*p);
}
#endif

static noinline void __process_finished_items(struct rcu_pending *pending,
           struct rcu_pending_pcpu *p,
           unsigned long flags)
{
struct rcu_pending_list *expired = &p->lists[NUM_ACTIVE_RCU_POLL_OLDSTATE];
struct rcu_pending_seq objs = {};
struct rcu_head *list = NULL;

if (p->objs.nr &&
     __poll_state_synchronize_rcu(pending->srcu, p->objs.data[0].seq)) {
  objs = p->objs.data[0];
  darray_remove_item(&p->objs, p->objs.data);
}

merge_expired_lists(p);

list = expired->head;
expired->head = expired->tail = NULL;

spin_unlock_irqrestore(&p->lock, flags);

switch ((ulong) pending->process) {
case RCU_PENDING_KVFREE:
  for (size_t i = 0; i < objs.nr; ) {
   size_t nr_this_node = min(GENRADIX_NODE_SIZE / sizeof(void *), objs.nr - i);

   kfree_bulk(nr_this_node, (void **) genradix_ptr(&objs.objs, i));
   i += nr_this_node;
  }
  genradix_free(&objs.objs);

  while (list) {
   struct rcu_head *obj = list;
#ifdef __KERNEL__
   list = obj->next;
#else
   list = (void *) obj->next.next;
#endif

   /*
* low bit of pointer indicates whether rcu_head needs
* to be freed - kvfree_rcu_mightsleep()
*/
   BUILD_BUG_ON(ARCH_SLAB_MINALIGN == 0);

   void *ptr = (void *)(((unsigned long) obj->func) & ~1UL);
   bool free_head = ((unsigned long) obj->func) & 1UL;

   kvfree(ptr);
   if (free_head)
    kfree(obj);
  }

  break;

case RCU_PENDING_CALL_RCU:
  for (size_t i = 0; i < objs.nr; i++) {
   struct rcu_head *obj = *genradix_ptr(&objs.objs, i);
   obj->func(obj);
  }
  genradix_free(&objs.objs);

  while (list) {
   struct rcu_head *obj = list;
#ifdef __KERNEL__
   list = obj->next;
#else
   list = (void *) obj->next.next;
#endif
   obj->func(obj);
  }
  break;

default:
  for (size_t i = 0; i < objs.nr; i++)
   pending->process(pending, *genradix_ptr(&objs.objs, i));
  genradix_free(&objs.objs);

  while (list) {
   struct rcu_head *obj = list;
#ifdef __KERNEL__
   list = obj->next;
#else
   list = (void *) obj->next.next;
#endif
   pending->process(pending, obj);
  }
  break;
}
}

static bool process_finished_items(struct rcu_pending *pending,
       struct rcu_pending_pcpu *p,
       unsigned long flags)
{
/*
* XXX: we should grab the gp seq once and avoid multiple function
* calls, this is called from __rcu_pending_enqueue() fastpath in
* may_sleep==true mode
*/
if ((p->objs.nr && __poll_state_synchronize_rcu(pending->srcu, p->objs.data[0].seq)) ||
     (p->lists[0].head && __poll_state_synchronize_rcu(pending->srcu, p->lists[0].seq)) ||
     (p->lists[1].head && __poll_state_synchronize_rcu(pending->srcu, p->lists[1].seq)) ||
     p->lists[2].head) {
  __process_finished_items(pending, p, flags);
  return true;
}

return false;
}

static void rcu_pending_work(struct work_struct *work)
{
struct rcu_pending_pcpu *p =
  container_of(work, struct rcu_pending_pcpu, work);
struct rcu_pending *pending = p->parent;
unsigned long flags;

do {
  spin_lock_irqsave(&p->lock, flags);
} while (process_finished_items(pending, p, flags));

spin_unlock_irqrestore(&p->lock, flags);
}

static void rcu_pending_rcu_cb(struct rcu_head *rcu)
{
struct rcu_pending_pcpu *p = container_of(rcu, struct rcu_pending_pcpu, cb);

schedule_work_on(p->cpu, &p->work);

unsigned long flags;
spin_lock_irqsave(&p->lock, flags);
if (__rcu_pending_has_pending(p)) {
  spin_unlock_irqrestore(&p->lock, flags);
  __call_rcu(p->parent->srcu, &p->cb, rcu_pending_rcu_cb);
} else {
  p->cb_armed = false;
  spin_unlock_irqrestore(&p->lock, flags);
}
}

static __always_inline struct rcu_pending_seq *
get_object_radix(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq)
{
darray_for_each_reverse(p->objs, objs)
  if (rcu_gp_poll_cookie_eq(objs->seq, seq))
   return objs;

if (darray_push_gfp(&p->objs, ((struct rcu_pending_seq) { .seq = seq }), GFP_ATOMIC))
  return NULL;

return &darray_last(p->objs);
}

static noinline bool
rcu_pending_enqueue_list(struct rcu_pending_pcpu *p, rcu_gp_poll_state_t seq,
    struct rcu_head *head, void *ptr,
    unsigned long *flags)
{
if (ptr) {
  if (!head) {
   /*
* kvfree_rcu_mightsleep(): we weren't passed an
* rcu_head, but we need one: use the low bit of the
* ponter to free to flag that the head needs to be
* freed as well:
*/
   ptr = (void *)(((unsigned long) ptr)|1UL);
   head = kmalloc(sizeof(*head), __GFP_NOWARN);
   if (!head) {
    spin_unlock_irqrestore(&p->lock, *flags);
    head = kmalloc(sizeof(*head), GFP_KERNEL|__GFP_NOFAIL);
    /*
* dropped lock, did GFP_KERNEL allocation,
* check for gp expiration
*/
    if (unlikely(__poll_state_synchronize_rcu(p->parent->srcu, seq))) {
     kvfree(--ptr);
     kfree(head);
     spin_lock_irqsave(&p->lock, *flags);
     return false;
    }
   }
  }

  head->func = ptr;
}
again:
for (struct rcu_pending_list *i = p->lists;
      i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) {
  if (rcu_gp_poll_cookie_eq(i->seq, seq)) {
   rcu_pending_list_add(i, head);
   return false;
  }
}

for (struct rcu_pending_list *i = p->lists;
      i < p->lists + NUM_ACTIVE_RCU_POLL_OLDSTATE; i++) {
  if (!i->head) {
   i->seq = seq;
   rcu_pending_list_add(i, head);
   return true;
  }
}

merge_expired_lists(p);
goto again;
}

/*
* __rcu_pending_enqueue: enqueue a pending RCU item, to be processed (via
* pending->pracess) once grace period elapses.
*
* Attempt to enqueue items onto a radix tree; if memory allocation fails, fall
* back to a linked list.
*
* - If @ptr is NULL, we're enqueuing an item for a generic @pending with a
*   process callback
*
* - If @ptr and @head are both not NULL, we're kvfree_rcu()
*
* - If @ptr is not NULL and @head is, we're kvfree_rcu_mightsleep()
*
* - If @may_sleep is true, will do GFP_KERNEL memory allocations and process
*   expired items.
*/
static __always_inline void
__rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *head,
        void *ptr, bool may_sleep)
{

struct rcu_pending_pcpu *p;
struct rcu_pending_seq *objs;
struct genradix_node *new_node = NULL;
unsigned long flags;
bool start_gp = false;

BUG_ON((ptr != NULL) != (pending->process == RCU_PENDING_KVFREE_FN));

/* We could technically be scheduled before taking the lock and end up
* using a different cpu's rcu_pending_pcpu: that's ok, it needs a lock
* anyways
*
* And we have to do it this way to avoid breaking PREEMPT_RT, which
* redefines how spinlocks work:
*/
p = raw_cpu_ptr(pending->p);
spin_lock_irqsave(&p->lock, flags);
rcu_gp_poll_state_t seq = __get_state_synchronize_rcu(pending->srcu);
restart:
if (may_sleep &&
     unlikely(process_finished_items(pending, p, flags)))
  goto check_expired;

/*
* In kvfree_rcu() mode, the radix tree is only for slab pointers so
* that we can do kfree_bulk() - vmalloc pointers always use the linked
* list:
*/
if (ptr && unlikely(is_vmalloc_addr(ptr)))
  goto list_add;

objs = get_object_radix(p, seq);
if (unlikely(!objs))
  goto list_add;

if (unlikely(!objs->cursor)) {
  /*
* New radix tree nodes must be added under @p->lock because the
* tree root is in a darray that can be resized (typically,
* genradix supports concurrent unlocked allocation of new
* nodes) - hence preallocation and the retry loop:
*/
  objs->cursor = genradix_ptr_alloc_preallocated_inlined(&objs->objs,
      objs->nr, &new_node, GFP_ATOMIC|__GFP_NOWARN);
  if (unlikely(!objs->cursor)) {
   if (may_sleep) {
    spin_unlock_irqrestore(&p->lock, flags);

    gfp_t gfp = GFP_KERNEL;
    if (!head)
     gfp |= __GFP_NOFAIL;

    new_node = genradix_alloc_node(gfp);
    if (!new_node)
     may_sleep = false;
    goto check_expired;
   }
list_add:
   start_gp = rcu_pending_enqueue_list(p, seq, head, ptr, &flags);
   goto start_gp;
  }
}

*objs->cursor++ = ptr ?: head;
/* zero cursor if we hit the end of a radix tree node: */
if (!(((ulong) objs->cursor) & (GENRADIX_NODE_SIZE - 1)))
  objs->cursor = NULL;
start_gp = !objs->nr;
objs->nr++;
start_gp:
if (unlikely(start_gp)) {
  /*
* We only have one callback (ideally, we would have one for
* every outstanding graceperiod) - so if our callback is
* already in flight, we may still have to start a grace period
* (since we used get_state() above, not start_poll())
*/
  if (!p->cb_armed) {
   p->cb_armed = true;
   __call_rcu(pending->srcu, &p->cb, rcu_pending_rcu_cb);
  } else {
   __start_poll_synchronize_rcu(pending->srcu);
  }
}
spin_unlock_irqrestore(&p->lock, flags);
free_node:
if (new_node)
  genradix_free_node(new_node);
return;
check_expired:
if (unlikely(__poll_state_synchronize_rcu(pending->srcu, seq))) {
  switch ((ulong) pending->process) {
  case RCU_PENDING_KVFREE:
   kvfree(ptr);
   break;
  case RCU_PENDING_CALL_RCU:
   head->func(head);
   break;
  default:
   pending->process(pending, head);
   break;
  }
  goto free_node;
}

p = raw_cpu_ptr(pending->p);
spin_lock_irqsave(&p->lock, flags);
goto restart;
}

void rcu_pending_enqueue(struct rcu_pending *pending, struct rcu_head *obj)
{
__rcu_pending_enqueue(pending, obj, NULL, true);
}

static struct rcu_head *rcu_pending_pcpu_dequeue(struct rcu_pending_pcpu *p)
{
struct rcu_head *ret = NULL;

spin_lock_irq(&p->lock);
darray_for_each(p->objs, objs)
  if (objs->nr) {
   ret = *genradix_ptr(&objs->objs, --objs->nr);
   objs->cursor = NULL;
   if (!objs->nr)
    genradix_free(&objs->objs);
   goto out;
  }

static_array_for_each(p->lists, i)
  if (i->head) {
   ret = i->head;
#ifdef __KERNEL__
   i->head = ret->next;
#else
   i->head = (void *) ret->next.next;
#endif
   if (!i->head)
    i->tail = NULL;
   goto out;
  }
out:
spin_unlock_irq(&p->lock);

return ret;
}

struct rcu_head *rcu_pending_dequeue(struct rcu_pending *pending)
{
return rcu_pending_pcpu_dequeue(raw_cpu_ptr(pending->p));
}

struct rcu_head *rcu_pending_dequeue_from_all(struct rcu_pending *pending)
{
struct rcu_head *ret = rcu_pending_dequeue(pending);

if (ret)
  return ret;

int cpu;
for_each_possible_cpu(cpu) {
  ret = rcu_pending_pcpu_dequeue(per_cpu_ptr(pending->p, cpu));
  if (ret)
   break;
}
return ret;
}

static bool rcu_pending_has_pending_or_armed(struct rcu_pending *pending)
{
int cpu;
for_each_possible_cpu(cpu) {
  struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
  spin_lock_irq(&p->lock);
  if (__rcu_pending_has_pending(p) || p->cb_armed) {
   spin_unlock_irq(&p->lock);
   return true;
  }
  spin_unlock_irq(&p->lock);
}

return false;
}

void rcu_pending_exit(struct rcu_pending *pending)
{
int cpu;

if (!pending->p)
  return;

while (rcu_pending_has_pending_or_armed(pending)) {
  __rcu_barrier(pending->srcu);

  for_each_possible_cpu(cpu) {
   struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
   flush_work(&p->work);
  }
}

for_each_possible_cpu(cpu) {
  struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
  flush_work(&p->work);
}

for_each_possible_cpu(cpu) {
  struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);

  static_array_for_each(p->lists, i)
   WARN_ON(i->head);
  WARN_ON(p->objs.nr);
  darray_exit(&p->objs);
}
free_percpu(pending->p);
}

/**
* rcu_pending_init: - initialize a rcu_pending
*
* @pending: Object to init
* @srcu: May optionally be used with an srcu_struct; if NULL, uses normal
* RCU flavor
* @process: Callback function invoked on objects once their RCU barriers
* have completed; if NULL, kvfree() is used.
*/
int rcu_pending_init(struct rcu_pending *pending,
       struct srcu_struct *srcu,
       rcu_pending_process_fn process)
{
pending->p = alloc_percpu(struct rcu_pending_pcpu);
if (!pending->p)
  return -ENOMEM;

int cpu;
for_each_possible_cpu(cpu) {
  struct rcu_pending_pcpu *p = per_cpu_ptr(pending->p, cpu);
  p->parent = pending;
  p->cpu  = cpu;
  spin_lock_init(&p->lock);
  darray_init(&p->objs);
  INIT_WORK(&p->work, rcu_pending_work);
}

pending->srcu = srcu;
pending->process = process;

return 0;
}

Messung V0.5

¤ Dauer der Verarbeitung: 0.12 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.