/* Make sure the below netif_xmit_frozen_or_stopped() * checking happens after clearing STATE_MISSED.
*/
smp_mb__after_atomic();
/* Checking netif_xmit_frozen_or_stopped() again to * make sure STATE_MISSED is set if the STATE_MISSED * set by netif_tx_wake_queue()'s rescheduling of * net_tx_action() is cleared by the above clear_bit().
*/ if (!netif_xmit_frozen_or_stopped(txq))
set_bit(__QDISC_STATE_MISSED, &q->state); else
set_bit(__QDISC_STATE_DRAINING, &q->state);
}
/* Main transmission queue. */
/* Modifications to data participating in scheduling must be protected with * qdisc_lock(qdisc) spinlock. * * The idea is the following: * - enqueue, dequeue are serialized via qdisc root lock * - ingress filtering is also serialized via qdisc root lock * - updates to tree and tree walking are only done under the rtnl mutex.
*/
if (q->flags & TCQ_F_NOLOCK) {
lock = qdisc_lock(q);
spin_lock(lock);
}
while (skb) { struct sk_buff *next = skb->next;
__skb_queue_tail(&q->gso_skb, skb);
/* it's still part of the queue */ if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_requeues_inc(q);
qdisc_qstats_cpu_backlog_inc(q, skb);
qdisc_qstats_cpu_qlen_inc(q);
} else {
q->qstats.requeues++;
qdisc_qstats_backlog_inc(q, skb);
q->q.qlen++;
}
/* This variant of try_bulk_dequeue_skb() makes sure * all skbs in the chain are for the same txq
*/ staticvoid try_bulk_dequeue_skb_slow(struct Qdisc *q, struct sk_buff *skb, int *packets)
{ int mapping = skb_get_queue_mapping(skb); struct sk_buff *nskb; int cnt = 0;
do {
nskb = q->dequeue(q); if (!nskb) break; if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
qdisc_enqueue_skb_bad_txq(q, nskb); break;
}
skb->next = nskb;
skb = nskb;
} while (++cnt < 8);
(*packets) += cnt;
skb_mark_not_on_list(skb);
}
/* Note that dequeue_skb can possibly return a SKB list (via skb->next). * A requeued skb (via q->gso_skb) can also be a SKB list.
*/ staticstruct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, int *packets, int budget)
{ conststruct netdev_queue *txq = q->dev_queue; struct sk_buff *skb = NULL;
*packets = 1; if (unlikely(!skb_queue_empty(&q->gso_skb))) {
spinlock_t *lock = NULL;
if (q->flags & TCQ_F_NOLOCK) {
lock = qdisc_lock(q);
spin_lock(lock);
}
skb = skb_peek(&q->gso_skb);
/* skb may be null if another cpu pulls gso_skb off in between * empty check and lock.
*/ if (!skb) { if (lock)
spin_unlock(lock); goto validate;
}
/* skb in gso_skb were already validated */
*validate = false; if (xfrm_offload(skb))
*validate = true; /* check the reason of requeuing without tx lock first */
txq = skb_get_tx_queue(txq->dev, skb); if (!netif_xmit_frozen_or_stopped(txq)) {
skb = __skb_dequeue(&q->gso_skb); if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
qdisc_qstats_cpu_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
}
} else {
skb = NULL;
qdisc_maybe_clear_missed(q, txq);
} if (lock)
spin_unlock(lock); goto trace;
}
validate:
*validate = true;
/* * Transmit possibly several skbs, and handle the return status as * required. Owning qdisc running bit guarantees that only one CPU * can execute this function. * * Returns to the caller: * false - hardware queue frozen backoff * true - feel free to send more pkts
*/ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q, struct net_device *dev, struct netdev_queue *txq,
spinlock_t *root_lock, bool validate)
{ int ret = NETDEV_TX_BUSY; bool again = false;
/* And release qdisc */ if (root_lock)
spin_unlock(root_lock);
/* Note that we validate skb (GSO, checksum, ...) outside of locks */ if (validate)
skb = validate_xmit_skb_list(skb, dev, &again);
#ifdef CONFIG_XFRM_OFFLOAD if (unlikely(again)) { if (root_lock)
spin_lock(root_lock);
dev_requeue_skb(skb, q); returnfalse;
} #endif
if (likely(skb)) {
HARD_TX_LOCK(dev, txq, smp_processor_id()); if (!netif_xmit_frozen_or_stopped(txq))
skb = dev_hard_start_xmit(skb, dev, txq, &ret); else
qdisc_maybe_clear_missed(q, txq);
HARD_TX_UNLOCK(dev, txq);
} else { if (root_lock)
spin_lock(root_lock); returntrue;
}
if (root_lock)
spin_lock(root_lock);
if (!dev_xmit_complete(ret)) { /* Driver returned NETDEV_TX_BUSY - requeue skb */ if (unlikely(ret != NETDEV_TX_BUSY))
net_warn_ratelimited("BUG %s code %d qlen %d\n",
dev->name, ret, q->q.qlen);
dev_requeue_skb(skb, q); returnfalse;
}
returntrue;
}
/* * NOTE: Called under qdisc_lock(q) with locally disabled BH. * * running seqcount guarantees only one CPU can process * this qdisc at a time. qdisc_lock(q) serializes queue accesses for * this queue. * * netif_tx_lock serializes accesses to device driver. * * qdisc_lock(q) and netif_tx_lock are mutually exclusive, * if one is grabbed, another must be free. * * Note, that this procedure can be called by a watchdog timer * * Returns to the caller: * 0 - queue is empty or throttled. * >0 - queue is not empty. *
*/ staticinlinebool qdisc_restart(struct Qdisc *q, int *packets, int budget)
{
spinlock_t *root_lock = NULL; struct netdev_queue *txq; struct net_device *dev; struct sk_buff *skb; bool validate;
for (i = 1; i < dev->num_tx_queues; i++) {
val = READ_ONCE(netdev_get_tx_queue(dev, i)->trans_start); if (val && time_after(val, res))
res = val;
}
return res;
}
EXPORT_SYMBOL(dev_trans_start);
staticvoid netif_freeze_queues(struct net_device *dev)
{ unsignedint i; int cpu;
cpu = smp_processor_id(); for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
/* We are the only thread of execution doing a * freeze, but we have to grab the _xmit_lock in * order to synchronize with threads which are in * the ->hard_start_xmit() handler and already * checked the frozen bit.
*/
__netif_tx_lock(txq, cpu);
set_bit(__QUEUE_STATE_FROZEN, &txq->state);
__netif_tx_unlock(txq);
}
}
for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
/* No need to grab the _xmit_lock here. If the * queue is not stopped for another reason, we * force a schedule.
*/
clear_bit(__QUEUE_STATE_FROZEN, &txq->state);
netif_schedule_queue(txq);
}
}
/** * netif_carrier_on - set carrier * @dev: network device * * Device has detected acquisition of carrier.
*/ void netif_carrier_on(struct net_device *dev)
{ if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { if (dev->reg_state == NETREG_UNINITIALIZED) return;
atomic_inc(&dev->carrier_up_count);
linkwatch_fire_event(dev); if (netif_running(dev))
netdev_watchdog_up(dev);
}
}
EXPORT_SYMBOL(netif_carrier_on);
/** * netif_carrier_off - clear carrier * @dev: network device * * Device has detected loss of carrier.
*/ void netif_carrier_off(struct net_device *dev)
{ if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { if (dev->reg_state == NETREG_UNINITIALIZED) return;
atomic_inc(&dev->carrier_down_count);
linkwatch_fire_event(dev);
}
}
EXPORT_SYMBOL(netif_carrier_off);
/** * netif_carrier_event - report carrier state event * @dev: network device * * Device has detected a carrier event but the carrier state wasn't changed. * Use in drivers when querying carrier state asynchronously, to avoid missing * events (link flaps) if link recovers before it's queried.
*/ void netif_carrier_event(struct net_device *dev)
{ if (dev->reg_state == NETREG_UNINITIALIZED) return;
atomic_inc(&dev->carrier_up_count);
atomic_inc(&dev->carrier_down_count);
linkwatch_fire_event(dev);
}
EXPORT_SYMBOL_GPL(netif_carrier_event);
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces under all circumstances. It is difficult to invent anything faster or cheaper.
*/
staticint noqueue_init(struct Qdisc *qdisc, struct nlattr *opt, struct netlink_ext_ack *extack)
{ /* register_qdisc() assigns a default of noop_enqueue if unset, * but __dev_queue_xmit() treats noqueue only as such
* if this is NULL - so clear it here. */
qdisc->enqueue = NULL; return 0;
}
retry: for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) { struct skb_array *q = band2list(priv, band);
if (__skb_array_empty(q)) continue;
skb = __skb_array_consume(q);
} if (likely(skb)) {
qdisc_update_stats_at_dequeue(qdisc, skb);
} elseif (need_retry &&
READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) { /* Delay clearing the STATE_MISSED here to reduce * the overhead of the second spin_trylock() in * qdisc_run_begin() and __netif_schedule() calling * in qdisc_run_end().
*/
clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
/* Make sure dequeuing happens after clearing * STATE_MISSED.
*/
smp_mb__after_atomic();
/* NULL ring is possible if destroy path is due to a failed * skb_array_init() in pfifo_fast_init() case.
*/ if (!q->ring.queue) continue; /* Destroy ring but no need to kfree_skb because a call to * pfifo_fast_reset() has already done that work.
*/
ptr_ring_cleanup(&q->ring, NULL);
}
}
/* seqlock has the same scope of busylock, for NOLOCK qdisc */
spin_lock_init(&sch->seqlock);
lockdep_set_class(&sch->seqlock,
dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
/* Version of qdisc_put() that is called with rtnl mutex unlocked. * Intended to be used as optimization, this function only takes rtnl lock if * qdisc reference counter reached zero.
*/
/* ... and graft new one */ if (qdisc == NULL)
qdisc = &noop_qdisc;
rcu_assign_pointer(dev_queue->qdisc_sleeping, qdisc);
rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);
val = (qdisc_is_running(q) ||
test_bit(__QDISC_STATE_SCHED, &q->state));
spin_unlock_bh(root_lock);
if (val) returntrue;
} returnfalse;
}
/** * dev_deactivate_many - deactivate transmissions on several devices * @head: list of devices to deactivate * * This function returns only when all outstanding transmissions * have completed, unless all devices are in dismantle phase.
*/ void dev_deactivate_many(struct list_head *head)
{ bool sync_needed = false; struct net_device *dev;
if (dev_ingress_queue(dev))
dev_reset_queue(dev, dev_ingress_queue(dev), NULL);
}
/* Wait for outstanding qdisc_run calls. */
list_for_each_entry(dev, head, close_list) { while (some_qdisc_is_busy(dev)) { /* wait_event() would avoid this sleep-loop but would * require expensive checks in the fast paths of packet * processing which isn't worth it.
*/
schedule_timeout_uninterruptible(1);
}
}
}
for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping); /* Only update the default qdiscs we created, * qdiscs with handles are always hashed.
*/ if (qdisc != &noop_qdisc && !qdisc->handle)
qdisc_hash_del(qdisc);
} for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc_sleeping); if (qdisc != &noop_qdisc && !qdisc->handle)
qdisc_hash_add(qdisc, false);
} #endif
}
EXPORT_SYMBOL(mq_change_real_num_tx);
int dev_qdisc_change_tx_queue_len(struct net_device *dev)
{ bool up = dev->flags & IFF_UP; unsignedint i; int ret = 0;
if (up)
dev_deactivate(dev);
for (i = 0; i < dev->num_tx_queues; i++) {
ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]);
/* TODO: revert changes on a partial failure */ if (ret) break;
}
/** * psched_ratecfg_precompute__() - Pre-compute values for reciprocal division * @rate: Rate to compute reciprocal division values of * @mult: Multiplier for reciprocal division * @shift: Shift for reciprocal division * * The multiplier and shift for reciprocal division by rate are stored * in mult and shift. * * The deal here is to replace a divide by a reciprocal one * in fast path (a reciprocal divide is a multiply and a shift) * * Normal formula would be : * time_in_ns = (NSEC_PER_SEC * len) / rate_bps * * We compute mult/shift to use instead : * time_in_ns = (len * mult) >> shift; * * We try to get the highest possible mult value for accuracy, * but have to make sure no overflows will ever happen. * * reciprocal_value() is not used here it doesn't handle 64-bit values.
*/ staticvoid psched_ratecfg_precompute__(u64 rate, u32 *mult, u8 *shift)
{
u64 factor = NSEC_PER_SEC;
/* We need to make sure that readers won't see the miniq * we are about to modify. So ensure that at least one RCU * grace period has elapsed since the miniq was made * inactive.
*/ if (IS_ENABLED(CONFIG_PREEMPT_RT))
cond_synchronize_rcu(miniq->rcu_state); elseif (!poll_state_synchronize_rcu(miniq->rcu_state))
synchronize_rcu_expedited();
if (miniq_old) /* This is counterpart of the rcu sync above. We need to * block potential new user of miniq_old until all readers * are not seeing it.
*/
miniq_old->rcu_state = start_poll_synchronize_rcu();
}
EXPORT_SYMBOL(mini_qdisc_pair_swap);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.