// SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_u32.c Ugly (or Universal) 32bit key Packet Classifier. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * The filters are packed to hash tables of key nodes * with a set of 32bit key/mask pairs at every node. * Nodes reference next level hash tables etc. * * This scheme is the best universal classifier I managed to * invent; it is not super-fast, but it is not slow (provided you * program it correctly), and general enough. And its relative * speed grows as the number of rules becomes larger. * * It seems that it represents the best middle point between * speed and manageability both by human and by machine. * * It is especially useful for link sharing combined with QoS; * pure RSVP doesn't need such a general approach and can use * much simpler (and faster) schemes, sort of cls_rsvp.c. * * nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro>
*/
struct tc_u_knode { struct tc_u_knode __rcu *next;
u32 handle; struct tc_u_hnode __rcu *ht_up; struct tcf_exts exts; int ifindex;
u8 fshift; struct tcf_result res; struct tc_u_hnode __rcu *ht_down; #ifdef CONFIG_CLS_U32_PERF struct tc_u32_pcnt __percpu *pf; #endif
u32 flags; unsignedint in_hw_count; #ifdef CONFIG_CLS_U32_MARK
u32 val;
u32 mask;
u32 __percpu *pcpu_success; #endif struct rcu_work rwork; /* The 'sel' field MUST be the last field in structure to allow for * tc_u32_keys allocated at end of structure.
*/ struct tc_u32_sel sel;
};
struct tc_u_hnode { struct tc_u_hnode __rcu *next;
u32 handle;
u32 prio;
refcount_t refcnt; unsignedint divisor; struct idr handle_idr; bool is_root; struct rcu_head rcu;
u32 flags; /* The 'ht' field MUST be the last field in structure to allow for * more entries allocated at end of structure.
*/ struct tc_u_knode __rcu *ht[];
};
struct tc_u_hnode *ht = rcu_dereference_bh(tp->root); unsignedint off = skb_network_offset(skb); struct tc_u_knode *n; int sdepth = 0; int off2 = 0; int sel = 0; #ifdef CONFIG_CLS_U32_PERF int j; #endif int i, r;
next_ht:
n = rcu_dereference_bh(ht->ht[sel]);
next_knode: if (n) { struct tc_u32_key *key = n->sel.keys;
/* The block sharing is currently supported only * for classless qdiscs. In that case we use block * for tc_u_common identification. In case the * block is not shared, block->q is a valid pointer * and we can use that. That works for classful qdiscs.
*/ if (tcf_block_shared(block)) return block; else return block->q;
}
/* u32_delete_key_rcu should be called when free'ing a copied * version of a tc_u_knode obtained from u32_init_knode(). When * copies are obtained from u32_init_knode() the statistics are * shared between the old and new copies to allow readers to * continue to update the statistics during the copy. To support * this the u32_delete_key_rcu variant does not free the percpu * statistics.
*/ staticvoid u32_delete_key_work(struct work_struct *work)
{ struct tc_u_knode *key = container_of(to_rcu_work(work), struct tc_u_knode,
rwork);
rtnl_lock();
u32_destroy_key(key, false);
rtnl_unlock();
}
/* u32_delete_key_freepf_rcu is the rcu callback variant * that free's the entire structure including the statistics * percpu variables. Only use this if the key is not a copy * returned by u32_init_knode(). See u32_delete_key_rcu() * for the variant that should be used with keys return from * u32_init_knode()
*/ staticvoid u32_delete_key_freepf_work(struct work_struct *work)
{ struct tc_u_knode *key = container_of(to_rcu_work(work), struct tc_u_knode,
rwork);
rtnl_lock();
u32_destroy_key(key, true);
rtnl_unlock();
}
/* u32_destroy_key() will later free ht for us, if it's * still referenced by some knode
*/ if (refcount_dec_and_test(&ht->refcnt))
kfree_rcu(ht, rcu);
}
/* The node must always exist for it to be replaced if this is not the * case then something went very wrong elsewhere.
*/ for (pins = rtnl_dereference(*ins); ;
ins = &pins->next, pins = rtnl_dereference(*ins)) if (pins->handle == n->handle) break;
idr_replace(&ht->handle_idr, n, n->handle);
RCU_INIT_POINTER(n->next, pins->next);
rcu_assign_pointer(*ins, n);
}
#ifdef CONFIG_CLS_U32_PERF /* Statistics may be incremented by readers during update * so we must keep them in tact. When the node is later destroyed * a special destroy call must be made to not free the pf memory.
*/
new->pf = n->pf; #endif
#ifdef CONFIG_CLS_U32_MARK
new->val = n->val;
new->mask = n->mask; /* Similarly success statistics must be moved as pointers */
new->pcpu_success = n->pcpu_success; #endif
memcpy(&new->sel, s, struct_size(s, keys, s->nkeys));
if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) {
kfree(new); return NULL;
}
/* bump reference count as long as we hold pointer to structure */ if (ht)
refcount_inc(&ht->refcnt);
/* At this point, we need to derive the new handle that will be used to * uniquely map the identity of this table match entry. The * identity of the entry that we need to construct is 32 bits made of: * htid(12b):bucketid(8b):node/entryid(12b) * * At this point _we have the table(ht)_ in which we will insert this * entry. We carry the table's id in variable "htid". * Note that earlier code picked the ht selection either by a) the user * providing the htid specified via TCA_U32_HASH attribute or b) when * no such attribute is passed then the root ht, is default to at ID * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0. * If OTOH the user passed us the htid, they may also pass a bucketid of * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be * passed via the htid, so even if it was non-zero it will be ignored. * * We may also have a handle, if the user passed one. The handle also * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b). * Rule: the bucketid on the handle is ignored even if one was passed; * rather the value on "htid" is always assumed to be the bucketid.
*/ if (handle) { /* Rule: The htid from handle and tableid from htid must match */ if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch"); return -EINVAL;
} /* Ok, so far we have a valid htid(12b):bucketid(8b) but we * need to finalize the table entry identification with the last * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for * entries. Rule: nodeid of 0 is reserved only for tables(see * earlier code which processes TC_U32_DIVISOR attribute). * Rule: The nodeid can only be derived from the handle (and not * htid). * Rule: if the handle specified zero for the node id example * 0x60000000, then pick a new nodeid from the pool of IDs * this hash table has been allocating from. * If OTOH it is specified (i.e for example the user passed a * handle such as 0x60000123), then we use it generate our final * handle which is used to uniquely identify the match entry.
*/ if (!TC_U32_NODE(handle)) {
handle = gen_new_kid(ht, htid);
} else {
handle = htid | TC_U32_NODE(handle);
err = idr_alloc_u32(&ht->handle_idr, NULL, &handle,
handle, GFP_KERNEL); if (err) return err;
}
} else { /* The user did not give us a handle; lets just generate one * from the table's pool of nodeids.
*/
handle = gen_new_kid(ht, htid);
}
if (tb[TCA_U32_SEL] == NULL) {
NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
err = -EINVAL; goto erridr;
}
s = nla_data(tb[TCA_U32_SEL]);
sel_size = struct_size(s, keys, s->nkeys); if (nla_len(tb[TCA_U32_SEL]) < sel_size) {
err = -EINVAL; goto erridr;
}
n = kzalloc(struct_size(n, sel.keys, s->nkeys), GFP_KERNEL); if (n == NULL) {
err = -ENOBUFS; goto erridr;
}
err = u32_replace_hw_knode(tp, n, flags, extack); if (err) goto errunbind;
if (!tc_in_hw(n->flags))
n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
tcf_proto_update_usesw(tp, n->flags);
ins = &ht->ht[TC_U32_HASH(handle)]; for (pins = rtnl_dereference(*ins); pins;
ins = &pins->next, pins = rtnl_dereference(*ins)) if (TC_U32_NODE(handle) < TC_U32_NODE(pins->handle)) break;
for (ht = rtnl_dereference(tp_c->hlist);
ht;
ht = rtnl_dereference(ht->next)) { if (ht->prio != tp->prio) continue;
/* When adding filters to a new dev, try to offload the * hashtable first. When removing, do the filters before the * hashtable.
*/ if (add && !tc_skip_hw(ht->flags)) {
err = u32_reoffload_hnode(tp, ht, add, cb, cb_priv,
extack); if (err) return err;
}
for (h = 0; h <= ht->divisor; h++) { for (n = rtnl_dereference(ht->ht[h]);
n;
n = rtnl_dereference(n->next)) { if (tc_skip_hw(n->flags)) continue;
err = u32_reoffload_knode(tp, n, add, cb,
cb_priv, extack); if (err) return err;
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.