/* Use skb->cb to track consecutive/adjacent fragments coming at * the end of the queue. Nodes in the rb-tree queue will * contain "runs" of one or more adjacent fragments. * * Invariants: * - next_frag is NULL at the tail of a "run"; * - the head of a "run" has the sum of all fragment lengths in frag_run_len.
*/ struct ipfrag_skb_cb { union { struct inet_skb_parm h4; struct inet6_skb_parm h6;
}; struct sk_buff *next_frag; int frag_run_len; int ip_defrag_offset;
};
/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements * Value : 0xff if frame should be dropped. * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field
*/ const u8 ip_frag_ecn_table[16] = { /* at least one fragment had CE, and others ECT_0 or ECT_1 */
[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE,
[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
[IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE,
/* Atomically snapshot the list of fqdirs to free */
kill_list = llist_del_all(&fqdir_free_list);
/* We need to make sure all ongoing call_rcu(..., inet_frag_destroy_rcu) * have completed, since they need to dereference fqdir. * Would it not be nice to have kfree_rcu_barrier() ? :)
*/
rcu_barrier();
llist_for_each_entry_safe(fqdir, tmp, kill_list, free_list) {
f = fqdir->f; if (refcount_dec_and_test(&f->refcnt))
complete(&f->completion);
void inet_frag_kill(struct inet_frag_queue *fq, int *refs)
{ if (timer_delete(&fq->timer))
(*refs)++;
if (!(fq->flags & INET_FRAG_COMPLETE)) { struct fqdir *fqdir = fq->fqdir;
fq->flags |= INET_FRAG_COMPLETE;
rcu_read_lock(); /* The RCU read lock provides a memory barrier * guaranteeing that if fqdir->dead is false then * the hash table destruction will not start until * after we unlock. Paired with fqdir_pre_exit().
*/ if (!READ_ONCE(fqdir->dead)) {
rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
fqdir->f->rhash_params);
(*refs)++;
} else {
fq->flags |= INET_FRAG_HASH_DEAD;
}
rcu_read_unlock();
}
}
EXPORT_SYMBOL(inet_frag_kill);
timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock); /* One reference for the timer, one for the hash table. */
refcount_set(&q->refcnt, 2);
*prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
&q->node, f->rhash_params); if (*prev) { /* We could not insert in the hash table, * we need to cancel what inet_frag_alloc() * anticipated.
*/ int refs = 1;
int inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, int offset, int end)
{ struct sk_buff *last = q->fragments_tail;
/* RFC5722, Section 4, amended by Errata ID : 3089 * When reassembling an IPv6 datagram, if * one or more its constituent fragments is determined to be an * overlapping fragment, the entire datagram (and any constituent * fragments) MUST be silently discarded. * * Duplicates, however, should be ignored (i.e. skb dropped, but the * queue/fragments kept for later reassembly).
*/ if (!last)
fragrun_create(q, skb); /* First fragment. */ elseif (FRAG_CB(last)->ip_defrag_offset + last->len < end) { /* This is the common case: skb goes to the end. */ /* Detect and discard overlaps. */ if (offset < FRAG_CB(last)->ip_defrag_offset + last->len) return IPFRAG_OVERLAP; if (offset == FRAG_CB(last)->ip_defrag_offset + last->len)
fragrun_append_to_last(q, skb); else
fragrun_create(q, skb);
} else { /* Binary search. Note that skb can become the first fragment, * but not the last (covered above).
*/ struct rb_node **rbn, *parent;
rbn = &q->rb_fragments.rb_node; do { struct sk_buff *curr; int curr_run_end;
parent = *rbn;
curr = rb_to_skb(parent);
curr_run_end = FRAG_CB(curr)->ip_defrag_offset +
FRAG_CB(curr)->frag_run_len; if (end <= FRAG_CB(curr)->ip_defrag_offset)
rbn = &parent->rb_left; elseif (offset >= curr_run_end)
rbn = &parent->rb_right; elseif (offset >= FRAG_CB(curr)->ip_defrag_offset &&
end <= curr_run_end) return IPFRAG_DUP; else return IPFRAG_OVERLAP;
} while (*rbn); /* Here we have parent properly set, and rbn pointing to * one of its NULL left/right children. Insert skb.
*/
fragcb_clear(skb);
rb_link_node(&skb->rbnode, parent, rbn);
rb_insert_color(&skb->rbnode, &q->rb_fragments);
}
if (sk && is_skb_wmem(skb)) { /* TX: skb->sk might have been passed as argument to * dst->output and must remain valid until tx completes. * * Move sk to reassembled skb and fix up wmem accounting.
*/
orig_truesize = skb->truesize;
destructor = skb->destructor;
}
if (head != skb) {
fp = skb_clone(skb, GFP_ATOMIC); if (!fp) {
head = skb; goto out_restore_sk;
}
FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; if (RB_EMPTY_NODE(&skb->rbnode))
FRAG_CB(parent)->next_frag = fp; else
rb_replace_node(&skb->rbnode, &fp->rbnode,
&q->rb_fragments); if (q->fragments_tail == skb)
q->fragments_tail = fp;
if (orig_truesize) { /* prevent skb_morph from releasing sk */
skb->sk = NULL;
skb->destructor = NULL;
}
skb_morph(skb, head);
FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag;
rb_replace_node(&head->rbnode, &skb->rbnode,
&q->rb_fragments);
consume_skb(head);
head = skb;
}
WARN_ON(FRAG_CB(head)->ip_defrag_offset != 0);
delta = -head->truesize;
/* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) goto out_restore_sk;
delta += head->truesize; if (delta)
add_frag_mem_limit(q->fqdir, delta);
/* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments.
*/ if (skb_has_frag_list(head)) { struct sk_buff *clone; int i, plen = 0;
out_restore_sk: if (orig_truesize) { int ts_delta = head->truesize - orig_truesize;
/* if this reassembled skb is fragmented later, * fraglist skbs will get skb->sk assigned from head->sk, * and each frag skb will be released via sock_wfree. * * Update sk_wmem_alloc.
*/
head->sk = sk;
head->destructor = destructor;
refcount_add(ts_delta, &sk->sk_wmem_alloc);
}
/* Traverse the tree in order, to build frag_list. */
fp = FRAG_CB(head)->next_frag;
rbn = rb_next(&head->rbnode);
rb_erase(&head->rbnode, &q->rb_fragments);
sum_truesize = head->truesize; while (rbn || fp) { /* fp points to the next sk_buff in the current run; * rbn points to the next run.
*/ /* Go through the current run. */ while (fp) { struct sk_buff *next_frag = FRAG_CB(fp)->next_frag; bool stolen; int delta;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.