// SPDX-License-Identifier: GPL-2.0 /* * Management Component Transport Protocol (MCTP) - routing * implementation. * * This is currently based on a simple routing table, with no dst cache. The * number of routes should stay fairly small, so the lookup cost is small. * * Copyright (c) 2021 Code Construct * Copyright (c) 2021 Google
*/
/* TODO: look up in skb->cb? */
mh = mctp_hdr(skb);
if (!skb_headlen(skb)) return NULL;
type = (*(u8 *)skb->data) & 0x7f;
/* Look for binds in order of widening scope. A given destination or * source address also implies matching on a particular network. * * - Matching destination and source * - Matching destination * - Matching source * - Matching network, any address * - Any network or address
*/
/* A note on the key allocations. * * struct net->mctp.keys contains our set of currently-allocated keys for * MCTP tag management. The lookup tuple for these is the peer EID, * local EID and MCTP tag. * * In some cases, the peer EID may be MCTP_EID_ANY: for example, when a * broadcast message is sent, we may receive responses from any peer EID. * Because the broadcast dest address is equivalent to ANY, we create * a key with (local = local-eid, peer = ANY). This allows a match on the * incoming broadcast responses from any peer. * * We perform lookups when packets are received, and when tags are allocated * in two scenarios: * * - when a packet is sent, with a locally-owned tag: we need to find an * unused tag value for the (local, peer) EID pair. * * - when a tag is manually allocated: we need to find an unused tag value * for the peer EID, but don't have a specific local EID at that stage. * * in the latter case, on successful allocation, we end up with a tag with * (local = ANY, peer = peer-eid). * * So, the key set allows both a local EID of ANY, as well as a peer EID of * ANY in the lookup tuple. Both may be ANY if we prealloc for a broadcast. * The matching (in mctp_key_match()) during lookup allows the match value to * be ANY in either the dest or source addresses. * * When allocating (+ inserting) a tag, we need to check for conflicts amongst * the existing tag set. This requires macthing either exactly on the local * and peer addresses, or either being ANY.
*/
/* even though no refs exist here, the lock allows us to stay * consistent with the locking requirement of mctp_dev_release_key
*/
spin_lock_irqsave(&key->lock, flags);
mctp_dev_release_key(key->dev, key);
spin_unlock_irqrestore(&key->lock, flags);
/* Helper for mctp_route_input(). * We're done with the key; unlock and unref the key. * For the usual case of automatic expiry we remove the key from lists. * In the case that manual allocation is set on a key we release the lock * and local ref, reset reassembly, but don't remove from lists.
*/ staticvoid __mctp_key_done_in(struct mctp_sk_key *key, struct net *net, unsignedlong flags, unsignedlong reason)
__releases(&key->lock)
{ struct sk_buff *skb;
if (!key->reasm_head) { /* Since we're manipulating the shared frag_list, ensure it * isn't shared with any other SKBs. In the cloned case, * this will free the skb; callers can no longer access it * safely.
*/
key->reasm_head = skb_unshare(skb, GFP_ATOMIC); if (!key->reasm_head) return -ENOMEM;
/* We may be receiving a locally-routed packet; drop source sk * accounting. * * From here, we will either queue the skb - either to a frag_queue, or * to a receiving socket. When that succeeds, we clear the skb pointer; * a non-NULL skb on exit will be otherwise unowned, and hence * kfree_skb()-ed.
*/
skb_orphan(skb);
if (skb->pkt_type == PACKET_OUTGOING)
skb->pkt_type = PACKET_LOOPBACK;
/* ensure we have enough data for a header and a type */ if (skb->len < sizeof(struct mctp_hdr) + 1) goto out;
/* lookup socket / reasm context, exactly matching (src,dest,tag). * we hold a ref on the key, and key->lock held.
*/
key = mctp_lookup_key(net, skb, netid, mh->src, &f);
if (flags & MCTP_HDR_FLAG_SOM) { if (key) {
msk = container_of(key->sk, struct mctp_sock, sk);
} else { /* first response to a broadcast? do a more general * key lookup to find the socket, but don't use this * key for reassembly - we'll create a more specific * one for future packets if required (ie, !EOM). * * this lookup requires key->peer to be MCTP_ADDR_ANY, * it doesn't match just any key->peer.
*/
any_key = mctp_lookup_key(net, skb, netid,
MCTP_ADDR_ANY, &f); if (any_key) {
msk = container_of(any_key->sk, struct mctp_sock, sk);
spin_unlock_irqrestore(&any_key->lock, f);
}
}
/* single-packet message? deliver to socket, clean up any * pending key.
*/ if (flags & MCTP_HDR_FLAG_EOM) {
rc = sock_queue_rcv_skb(&msk->sk, skb); if (!rc)
skb = NULL; if (key) { /* we've hit a pending reassembly; not much we * can do but drop it
*/
__mctp_key_done_in(key, net, f,
MCTP_TRACE_KEY_REPLIED);
key = NULL;
} goto out_unlock;
}
/* broadcast response or a bind() - create a key for further * packets for this message
*/ if (!key) {
key = mctp_key_alloc(msk, netid, mh->dest, mh->src,
tag, GFP_ATOMIC); if (!key) {
rc = -ENOMEM; goto out_unlock;
}
/* we can queue without the key lock here, as the * key isn't observable yet
*/
mctp_frag_queue(key, skb);
skb = NULL;
/* if the key_add fails, we've raced with another * SOM packet with the same src, dest and tag. There's * no way to distinguish future packets, so all we * can do is drop.
*/
rc = mctp_key_add(key, msk); if (!rc)
trace_mctp_key_acquire(key);
/* we don't need to release key->lock on exit, so * clean up here and suppress the unlock via * setting to NULL
*/
mctp_key_unref(key);
key = NULL;
} elseif (key) { /* this packet continues a previous message; reassemble * using the message-specific key
*/
/* we need to be continuing an existing reassembly... */ if (!key->reasm_head) {
rc = -EINVAL;
} else {
rc = mctp_frag_queue(key, skb);
skb = NULL;
}
if (rc) goto out_unlock;
/* end of message? deliver to socket, and we're done with * the reassembly/response key
*/ if (flags & MCTP_HDR_FLAG_EOM) {
rc = sock_queue_rcv_skb(key->sk, key->reasm_head); if (!rc)
key->reasm_head = NULL;
__mctp_key_done_in(key, net, f, MCTP_TRACE_KEY_REPLIED);
key = NULL;
}
} else { /* not a start, no matching key */
rc = -ENOENT;
}
out_unlock:
rcu_read_unlock(); if (key) {
spin_unlock_irqrestore(&key->lock, f);
mctp_key_unref(key);
} if (any_key)
mctp_key_unref(any_key);
out:
kfree_skb(skb); return rc;
}
if (skb->len > dst->mtu) {
kfree_skb(skb); return -EMSGSIZE;
}
/* direct route; use the hwaddr we stashed in sendmsg */ if (dst->halen) { if (dst->halen != skb->dev->addr_len) { /* sanity check, sendmsg should have already caught this */
kfree_skb(skb); return -EMSGSIZE;
}
daddr = dst->haddr;
} else { /* If lookup fails let the device handle daddr==NULL */ if (mctp_neigh_lookup(dst->dev, dst->nexthop, daddr_buf) == 0)
daddr = daddr_buf;
}
/* we hold the net->key_lock here, allowing updates to both * then net and sk
*/
hlist_add_head_rcu(&key->hlist, &mns->keys);
hlist_add_head_rcu(&key->sklist, &msk->keys);
refcount_inc(&key->refs);
}
/* Allocate a locally-owned tag value for (local, peer), and reserve * it for the socket msk
*/ struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, unsignedint netid,
mctp_eid_t local, mctp_eid_t peer, bool manual, u8 *tagp)
{ struct net *net = sock_net(&msk->sk); struct netns_mctp *mns = &net->mctp; struct mctp_sk_key *key, *tmp; unsignedlong flags;
u8 tagbits;
/* for NULL destination EIDs, we may get a response from any peer */ if (peer == MCTP_ADDR_NULL)
peer = MCTP_ADDR_ANY;
/* be optimistic, alloc now */
key = mctp_key_alloc(msk, netid, local, peer, 0, GFP_KERNEL); if (!key) return ERR_PTR(-ENOMEM);
/* 8 possible tag values */
tagbits = 0xff;
spin_lock_irqsave(&mns->keys_lock, flags);
/* Walk through the existing keys, looking for potential conflicting * tags. If we find a conflict, clear that bit from tagbits
*/
hlist_for_each_entry(tmp, &mns->keys, hlist) { /* We can check the lookup fields (*_addr, tag) without the * lock held, they don't change over the lifetime of the key.
*/
/* tags are net-specific */ if (tmp->net != netid) continue;
/* if we don't own the tag, it can't conflict */ if (tmp->tag & MCTP_HDR_FLAG_TO) continue;
/* Since we're avoiding conflicting entries, match peer and * local addresses, including with a wildcard on ANY. See * 'A note on key allocations' for background.
*/ if (peer != MCTP_ADDR_ANY &&
!mctp_address_matches(tmp->peer_addr, peer)) continue;
if (local != MCTP_ADDR_ANY &&
!mctp_address_matches(tmp->local_addr, local)) continue;
spin_lock(&tmp->lock); /* key must still be valid. If we find a match, clear the * potential tag value
*/ if (tmp->valid)
tagbits &= ~(1 << tmp->tag);
spin_unlock(&tmp->lock);
if (!tagbits) break;
}
if (tagbits) {
key->tag = __ffs(tagbits);
mctp_reserve_tag(net, key, msk);
trace_mctp_key_acquire(key);
key->manual_alloc = manual;
*tagp = key->tag;
}
spin_unlock_irqrestore(&mns->keys_lock, flags);
if (!tagbits) {
mctp_key_unref(key); return ERR_PTR(-EBUSY);
}
/* must only be called on a direct route, as the final output hop */ staticvoid mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid, unsignedint mtu, struct mctp_route *route)
{
mctp_dev_hold(route->dev);
dst->nexthop = eid;
dst->dev = route->dev;
dst->mtu = READ_ONCE(dst->dev->dev->mtu); if (mtu)
dst->mtu = min(dst->mtu, mtu);
dst->halen = 0;
dst->output = route->output;
}
int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex, unsignedchar halen, constunsignedchar *haddr)
{ struct net_device *netdev; struct mctp_dev *dev; int rc = -ENOENT;
if (halen > sizeof(dst->haddr)) return -EINVAL;
rcu_read_lock();
netdev = dev_get_by_index_rcu(net, ifindex); if (!netdev) goto out_unlock;
/* populates *dst on successful lookup, if set */ int mctp_route_lookup(struct net *net, unsignedint dnet,
mctp_eid_t daddr, struct mctp_dst *dst)
{ constunsignedint max_depth = 32; unsignedint depth, mtu = 0; int rc = -EHOSTUNREACH;
rt = mctp_route_lookup_single(net, dnet, daddr); if (!rt) break;
/* clamp mtu to the smallest in the path, allowing 0 * to specify no restrictions
*/ if (mtu && rt->mtu)
mtu = min(mtu, rt->mtu); else
mtu = mtu ?: rt->mtu;
if (rt->dst_type == MCTP_ROUTE_DIRECT) { if (dst)
mctp_dst_from_route(dst, daddr, mtu, rt);
rc = 0; break;
/* route output functions consume the skb, even on error */
skb = NULL;
out_release:
kfree_skb(skb); return rc;
}
/* route management */
/* mctp_route_add(): Add the provided route, previously allocated via * mctp_route_alloc(). On success, takes ownership of @rt, which includes a * hold on rt->dev for usage in the route table. On failure a caller will want * to mctp_route_release(). * * We expect that the caller has set rt->type, rt->dst_type, rt->min, rt->max, * rt->mtu and either rt->dev (with a reference held appropriately) or * rt->gateway. Other fields will be populated.
*/ staticint mctp_route_add(struct net *net, struct mctp_route *rt)
{ struct mctp_route *ert;
if (!mctp_address_unicast(rt->min) || !mctp_address_unicast(rt->max)) return -EINVAL;
if (rt->dst_type == MCTP_ROUTE_DIRECT && !rt->dev) return -EINVAL;
if (rt->dst_type == MCTP_ROUTE_GATEWAY && !rt->gateway.eid) return -EINVAL;
switch (rt->type) { case RTN_LOCAL:
rt->output = mctp_dst_input; break; case RTN_UNICAST:
rt->output = mctp_dst_output; break; default: return -EINVAL;
}
/* removes all entries for a given device */ void mctp_route_remove_dev(struct mctp_dev *mdev)
{ struct net *net = dev_net(mdev->dev); struct mctp_route *rt, *tmp;
/* We have enough for a header; decode and route */
mh = mctp_hdr(skb); if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) goto err_drop;
/* source must be valid unicast or null; drop reserved ranges and * broadcast
*/ if (!(mctp_address_unicast(mh->src) || mctp_address_null(mh->src))) goto err_drop;
/* dest address: as above, but allow broadcast */ if (!(mctp_address_unicast(mh->dest) || mctp_address_null(mh->dest) ||
mctp_address_broadcast(mh->dest))) goto err_drop;
/* base parsing; common to both _lookup and _populate variants. * * For gateway routes (which have a RTA_GATEWAY, and no RTA_OIF), we populate * *gatweayp. for direct routes (RTA_OIF, no RTA_GATEWAY), we populate *mdev.
*/ staticint mctp_route_nlparse_common(struct net *net, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, struct nlattr **tb, struct rtmsg **rtm, struct mctp_dev **mdev, struct mctp_fq_addr *gatewayp,
mctp_eid_t *daddr_start)
{ struct mctp_fq_addr *gateway = NULL; unsignedint ifindex = 0; struct net_device *dev; int rc;
/* Full route parse for RTM_NEWROUTE: populate @rt. On success, * MCTP_ROUTE_DIRECT routes (ie, those with a direct dev) will hold a reference * to that dev.
*/ staticint mctp_route_nlparse_populate(struct net *net, struct nlmsghdr *nlh, struct netlink_ext_ack *extack, struct mctp_route *rt)
{ struct nlattr *tbx[RTAX_MAX + 1]; struct nlattr *tb[RTA_MAX + 1]; unsignedint daddr_extent; struct mctp_fq_addr gw;
mctp_eid_t daddr_start; struct mctp_dev *dev; struct rtmsg *rtm;
u32 mtu = 0; int rc;
/* we use the _len fields as a number of EIDs, rather than * a number of bits in the address
*/
hdr->rtm_dst_len = rt->max - rt->min;
hdr->rtm_src_len = 0;
hdr->rtm_tos = 0;
hdr->rtm_table = RT_TABLE_DEFAULT;
hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */
hdr->rtm_type = rt->type;
if (nla_put_u8(skb, RTA_DST, rt->min)) goto cancel;
metrics = nla_nest_start_noflag(skb, RTA_METRICS); if (!metrics) goto cancel;
if (rt->mtu) { if (nla_put_u32(skb, RTAX_MTU, rt->mtu)) goto cancel;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.