Source: Paul E. McKenney "Stochastic Fairness Queuing", IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
Paul E. McKenney "Stochastic Fairness Queuing", "Interworking: Research and Experience", v.2, 1991, p.113-131.
See also: M. Shreedhar and George Varghese "Efficient Fair Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
This is not the thing that is usually called (W)FQ nowadays. It does not use any timestamp mechanism, but instead processes queues in round-robin order.
ADVANTAGE:
- It is very cheap. Both CPU and memory requirements are minimal.
DRAWBACKS:
- "Stochastic" -> It is not 100% fair. When hash collisions occur, several flows are considered as one.
- "Round-robin" -> It introduces larger delays than virtual clock based schemes, and should not be used for isolating interactive traffic from non-interactive. It means, that this scheduler should be used as leaf of CBQ or P3, which put interactive traffic to higher priority band.
We still need true WFQ for top level CSZ, but using WFQ for the best effort traffic is absolutely pointless: SFQ is superior for this purpose.
IMPLEMENTATION: This implementation limits : - maximal queue length per flow to 127 packets. - max mtu to 2^18-1; - max 65408 flows, - number of hash buckets to 65536.
It is easy to increase these values, but not in flight. */
#define SFQ_MAX_DEPTH 127 /* max number of packets per flow */ #define SFQ_DEFAULT_FLOWS 128 #define SFQ_MAX_FLOWS (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */ #define SFQ_EMPTY_SLOT 0xffff #define SFQ_DEFAULT_HASH_DIVISOR 1024
/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */ typedef u16 sfq_index;
/* * We dont use pointers to save space. * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH] * are 'pointers' to dep[] array
*/ struct sfq_head {
sfq_index next;
sfq_index prev;
};
struct sfq_slot { struct sk_buff *skblist_next; struct sk_buff *skblist_prev;
sfq_index qlen; /* number of skbs in skblist */
sfq_index next; /* next slot in sfq RR chain */ struct sfq_head dep; /* anchor in dep[] chains */ unsignedshort hash; /* hash value (index in ht[]) */ int allot; /* credit for this slot */
unsignedint backlog; struct red_vars vars;
};
struct sfq_sched_data { /* frequently used fields */ int limit; /* limit of total number of packets in this qdisc */ unsignedint divisor; /* number of slots in hash table */
u8 headdrop;
u8 maxdepth; /* limit of packets per flow */
struct red_parms *red_parms; struct tc_sfqred_stats stats; struct sfq_slot *tail; /* current slot in round */
struct sfq_head dep[SFQ_MAX_DEPTH + 1]; /* Linked lists of slots, indexed by depth * dep[0] : list of unused flows * dep[1] : list of flows with 1 packet * dep[X] : list of flows with X packets
*/
unsignedint maxflows; /* number of flows in flows array */ int perturb_period; unsignedint quantum; /* Allotment per round: MUST BE >= MTU */ struct timer_list perturb_timer; struct Qdisc *sch;
};
/* * sfq_head are either in a sfq_slot or in dep[] array
*/ staticinlinestruct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
{ if (val < SFQ_MAX_FLOWS) return &q->slots[val].dep; return &q->dep[val - SFQ_MAX_FLOWS];
}
/* Queue is full! Find the longest slot and drop tail packet from it */ if (d > 1) {
x = q->dep[d].next;
slot = &q->slots[x];
drop:
skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
len = qdisc_pkt_len(skb);
slot->backlog -= len;
sfq_dec(q, x);
sch->q.qlen--;
qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch, to_free); return len;
}
if (d == 1) { /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
x = q->tail->next;
slot = &q->slots[x]; if (slot->next == x)
q->tail = NULL; /* no more active slots */ else
q->tail->next = slot->next;
q->ht[slot->hash] = SFQ_EMPTY_SLOT; goto drop;
}
/* Should packets over max threshold just be marked */ staticint sfq_hard_mark(conststruct sfq_sched_data *q)
{ return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
}
x = q->ht[hash];
slot = &q->slots[x]; if (x == SFQ_EMPTY_SLOT) {
x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) return qdisc_drop(skb, sch, to_free);
q->ht[hash] = x;
slot = &q->slots[x];
slot->hash = hash;
slot->backlog = 0; /* should already be 0 anyway... */
red_set_vars(&slot->vars); goto enqueue;
} if (q->red_parms) {
slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
&slot->vars,
slot->backlog); switch (red_action(q->red_parms,
&slot->vars,
slot->vars.qavg)) { case RED_DONT_MARK: break;
case RED_PROB_MARK:
qdisc_qstats_overlimit(sch); if (sfq_prob_mark(q)) { /* We know we have at least one packet in queue */ if (sfq_headdrop(q) &&
INET_ECN_set_ce(slot->skblist_next)) {
q->stats.prob_mark_head++; break;
} if (INET_ECN_set_ce(skb)) {
q->stats.prob_mark++; break;
}
}
q->stats.prob_drop++; goto congestion_drop;
case RED_HARD_MARK:
qdisc_qstats_overlimit(sch); if (sfq_hard_mark(q)) { /* We know we have at least one packet in queue */ if (sfq_headdrop(q) &&
INET_ECN_set_ce(slot->skblist_next)) {
q->stats.forced_mark_head++; break;
} if (INET_ECN_set_ce(skb)) {
q->stats.forced_mark++; break;
}
}
q->stats.forced_drop++; goto congestion_drop;
}
}
if (slot->qlen >= q->maxdepth) {
congestion_drop: if (!sfq_headdrop(q)) return qdisc_drop(skb, sch, to_free);
/* We know we have at least one packet in queue */
head = slot_dequeue_head(slot);
delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
sch->qstats.backlog -= delta;
slot->backlog -= delta;
qdisc_drop(head, sch, to_free);
enqueue:
qdisc_qstats_backlog_inc(sch, skb);
slot->backlog += qdisc_pkt_len(skb);
slot_queue_add(slot, skb);
sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */
slot->next = x;
} else {
slot->next = q->tail->next;
q->tail->next = x;
} /* We put this flow at the end of our flow list. * This might sound unfair for a new flow to wait after old ones, * but we could endup servicing new flows only, and freeze old ones.
*/
q->tail = slot; /* We could use a bigger initial quantum for new flows */
slot->allot = q->quantum;
} if (++sch->q.qlen <= q->limit) return NET_XMIT_SUCCESS;
qlen = slot->qlen;
dropped = sfq_drop(sch, to_free); /* Return Congestion Notification only if we dropped a packet * from this flow.
*/ if (qlen != slot->qlen) {
qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb)); return NET_XMIT_CN;
}
/* As we dropped a packet, better let upper stack know this */
qdisc_tree_reduce_backlog(sch, 1, dropped); return NET_XMIT_SUCCESS;
}
while ((skb = sfq_dequeue(sch)) != NULL)
rtnl_kfree_skbs(skb, skb);
}
/* * When q->perturbation is changed, we rehash all queued skbs * to avoid OOO (Out Of Order) effects. * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change * counters.
*/ staticvoid sfq_rehash(struct Qdisc *sch)
{ struct sfq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; int i; struct sfq_slot *slot; struct sk_buff_head list; int dropped = 0; unsignedint drop_len = 0;
__skb_queue_head_init(&list);
for (i = 0; i < q->maxflows; i++) {
slot = &q->slots[i]; if (!slot->qlen) continue; while (slot->qlen) {
skb = slot_dequeue_head(slot);
sfq_dec(q, i);
__skb_queue_tail(&list, skb);
}
slot->backlog = 0;
red_set_vars(&slot->vars);
q->ht[slot->hash] = SFQ_EMPTY_SLOT;
}
q->tail = NULL;
while ((skb = __skb_dequeue(&list)) != NULL) { unsignedint hash = sfq_hash(q, skb);
sfq_index x = q->ht[hash];
slot = &q->slots[x]; if (x == SFQ_EMPTY_SLOT) {
x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) {
drop:
qdisc_qstats_backlog_dec(sch, skb);
drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
dropped++; continue;
}
q->ht[hash] = x;
slot = &q->slots[x];
slot->hash = hash;
} if (slot->qlen >= q->maxdepth) goto drop;
slot_queue_add(slot, skb); if (q->red_parms)
slot->vars.qavg = red_calc_qavg(q->red_parms,
&slot->vars,
slot->backlog);
slot->backlog += qdisc_pkt_len(skb);
sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */
slot->next = x;
} else {
slot->next = q->tail->next;
q->tail->next = x;
}
q->tail = slot;
slot->allot = q->quantum;
}
}
sch->q.qlen -= dropped;
qdisc_tree_reduce_backlog(sch, dropped, drop_len);
}
/* q->perturb_period can change under us from * sfq_change() and sfq_destroy().
*/
period = READ_ONCE(q->perturb_period); if (period)
mod_timer(&q->perturb_timer, jiffies + period);
rcu_read_unlock();
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.