// SPDX-License-Identifier: GPL-2.0-or-later /* * IP multicast routing support for mrouted 3.6/3.8 * * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> * Linux Consultancy and Custom Driver Development * * Fixes: * Michael Chastain : Incorrect size of copying. * Alan Cox : Added the cache manager code * Alan Cox : Fixed the clone/copy bug and device race. * Mike McLagan : Routing by source * Malcolm Beattie : Buffer handling fixes. * Alexey Kuznetsov : Double buffer free and other fixes. * SVR Anand : Fixed several multicast bugs and problems. * Alexey Kuznetsov : Status, optimisations and more. * Brad Parker : Better behaviour on mrouted upcall * overflow. * Carlos Picoto : PIMv1 Support * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header * Relax this requirement to work with older peers.
*/
/* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock);
/* We return to original Alan's scheme. Hash table of resolved * entries is changed only in process context and protected * with weak lock mrt_lock. Queue of unresolved entries is protected * with strong spinlock mfc_unres_lock. * * In this case data path is free of exclusive locks at all.
*/
switch (rule->action) { case FR_ACT_TO_TBL: break; case FR_ACT_UNREACHABLE: return -ENETUNREACH; case FR_ACT_PROHIBIT: return -EACCES; case FR_ACT_BLACKHOLE: default: return -EINVAL;
}
/* called with rcu_read_lock() */ staticint __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, unsignedint pimlen)
{ struct net_device *reg_dev = NULL; struct iphdr *encap; int vif_num;
encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); /* Check that: * a. packet is really sent to a multicast group * b. packet is not a NULL-REGISTER * c. packet is not truncated
*/ if (!ipv4_is_multicast(encap->daddr) ||
encap->tot_len == 0 ||
ntohs(encap->tot_len) + pimlen > skb->len) return 1;
/* Pairs with WRITE_ONCE() in vif_add()/vid_delete() */
vif_num = READ_ONCE(mrt->mroute_reg_vif_num); if (vif_num >= 0)
reg_dev = vif_dev_read(&mrt->vif_table[vif_num]); if (!reg_dev) return 1;
/** * vif_delete - Delete a VIF entry * @mrt: Table to delete from * @vifi: VIF identifier to delete * @notify: Set to 1, if the caller is a notifier_call * @head: if unregistering the VIF, place it on this queue
*/ staticint vif_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head)
{ struct net *net = read_pnet(&mrt->net); struct vif_device *v; struct net_device *dev; struct in_device *in_dev;
if (vifi < 0 || vifi >= mrt->maxvif) return -EADDRNOTAVAIL;
v = &mrt->vif_table[vifi];
dev = rtnl_dereference(v->dev); if (!dev) return -EADDRNOTAVAIL;
if (vifi == mrt->mroute_reg_vif_num) { /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */
WRITE_ONCE(mrt->mroute_reg_vif_num, -1);
} if (vifi + 1 == mrt->maxvif) { int tmp;
if (!list_empty(&mrt->mfc_unres_queue))
mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
out:
spin_unlock(&mfc_unres_lock);
}
/* Fill oifs list. It is called under locked mrt_lock. */ staticvoid ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, unsignedchar *ttls)
{ int vifi;
staticint vif_add(struct net *net, struct mr_table *mrt, struct vifctl *vifc, int mrtsock)
{ struct netdev_phys_item_id ppid = { }; int vifi = vifc->vifc_vifi; struct vif_device *v = &mrt->vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; int err;
/* Is vif busy ? */ if (VIF_EXISTS(mrt, vifi)) return -EADDRINUSE;
switch (vifc->vifc_flags) { case VIFF_REGISTER: if (!ipmr_pimsm_enabled()) return -EINVAL; /* Special Purpose VIF in PIM * All the packets will be sent to the daemon
*/ if (mrt->mroute_reg_vif_num >= 0) return -EADDRINUSE;
dev = ipmr_reg_vif(net, mrt); if (!dev) return -ENOBUFS;
err = dev_set_allmulti(dev, 1); if (err) {
unregister_netdevice(dev);
dev_put(dev); return err;
} break; case VIFF_TUNNEL:
dev = ipmr_new_tunnel(net, vifc); if (IS_ERR(dev)) return PTR_ERR(dev); break; case VIFF_USE_IFINDEX: case 0: if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex); if (dev && !__in_dev_get_rtnl(dev)) {
dev_put(dev); return -EADDRNOTAVAIL;
}
} else {
dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
} if (!dev) return -EADDRNOTAVAIL;
err = dev_set_allmulti(dev, 1); if (err) {
dev_put(dev); return err;
} break; default: return -EINVAL;
}
/* A cache entry has gone into a resolved state from queued */ staticvoid ipmr_cache_resolve(struct net *net, struct mr_table *mrt, struct mfc_cache *uc, struct mfc_cache *c)
{ struct sk_buff *skb; struct nlmsgerr *e;
/* Play the pending entries through our router */ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { struct nlmsghdr *nlh = skb_pull(skb, sizeof(struct iphdr));
if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE) { /* Ugly, but we have no choice with this interface. * Duplicate old header, fix ihl, length etc. * And all this only to mangle msg->im_msgtype and * to set msg->im_mbz to "mbz" :-)
*/
skb_push(skb, sizeof(struct iphdr));
skb_reset_network_header(skb);
skb_reset_transport_header(skb);
msg = (struct igmpmsg *)skb_network_header(skb);
memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
msg->im_msgtype = assert;
msg->im_mbz = 0; if (assert == IGMPMSG_WRVIFWHOLE) {
msg->im_vif = vifi;
msg->im_vif_hi = vifi >> 8;
} else { /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ int vif_num = READ_ONCE(mrt->mroute_reg_vif_num);
ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
ipmr_rht_params); if (ret) {
pr_err("ipmr: rhtable insert error %d\n", ret);
ipmr_cache_free(c); return ret;
}
list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list); /* Check to see if we resolved a queued list. If so we * need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
uc = (struct mfc_cache *)_uc; if (uc->mfc_origin == c->mfc_origin &&
uc->mfc_mcastgrp == c->mfc_mcastgrp) {
list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true; break;
}
} if (list_empty(&mrt->mfc_unres_queue))
timer_delete(&mrt->ipmr_expire_timer);
spin_unlock_bh(&mfc_unres_lock);
/* Close the multicast socket, and clear the vif tables etc */ staticvoid mroute_clean_tables(struct mr_table *mrt, int flags)
{ struct net *net = read_pnet(&mrt->net); struct mr_mfc *c, *tmp; struct mfc_cache *cache;
LIST_HEAD(list); int i;
/* Shut down all active vif entries */ if (flags & (MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC)) { for (i = 0; i < mrt->maxvif; i++) { if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
!(flags & MRT_FLUSH_VIFS_STATIC)) ||
(!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS))) continue;
vif_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
}
/* called from ip_ra_control(), before an RCU grace period, * we don't need to call synchronize_rcu() here
*/ staticvoid mrtsock_destruct(struct sock *sk)
{ struct net *net = sock_net(sk); struct mr_table *mrt;
/* Socket options and virtual interface manipulation. The whole * virtual interface system is a complete heap, but unfortunately * that's how BSD mrouted happens to think. Maybe one day with a proper * MOSPF/PIM router set up we can clean this up.
*/
int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, unsignedint optlen)
{ struct net *net = sock_net(sk); int val, ret = 0, parent = 0; struct mr_table *mrt; struct vifctl vif; struct mfcctl mfc; bool do_wrvifwhole;
u32 uval;
/* There's one exception to the lock - MRT_DONE which needs to unlock */
rtnl_lock(); if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_IGMP) {
ret = -EOPNOTSUPP; goto out_unlock;
}
mrt = __ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT); if (!mrt) {
ret = -ENOENT; goto out_unlock;
} if (optname != MRT_INIT) { if (sk != rcu_access_pointer(mrt->mroute_sk) &&
!ns_capable(net->user_ns, CAP_NET_ADMIN)) {
ret = -EACCES; goto out_unlock;
}
}
switch (optname) { case MRT_INIT: if (optlen != sizeof(int)) {
ret = -EINVAL; break;
} if (rtnl_dereference(mrt->mroute_sk)) {
ret = -EADDRINUSE; break;
}
ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) {
rcu_assign_pointer(mrt->mroute_sk, sk);
IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
inet_netconf_notify_devconf(net, RTM_NEWNETCONF,
NETCONFA_MC_FORWARDING,
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
} break; case MRT_DONE: if (sk != rcu_access_pointer(mrt->mroute_sk)) {
ret = -EACCES;
} else { /* We need to unlock here because mrtsock_destruct takes * care of rtnl itself and we can't change that due to * the IP_ROUTER_ALERT setsockopt which runs without it.
*/
rtnl_unlock();
ret = ip_ra_control(sk, 0, NULL); goto out;
} break; case MRT_ADD_VIF: case MRT_DEL_VIF: if (optlen != sizeof(vif)) {
ret = -EINVAL; break;
} if (copy_from_sockptr(&vif, optval, sizeof(vif))) {
ret = -EFAULT; break;
} if (vif.vifc_vifi >= MAXVIFS) {
ret = -ENFILE; break;
} if (optname == MRT_ADD_VIF) {
ret = vif_add(net, mrt, &vif,
sk == rtnl_dereference(mrt->mroute_sk));
} else {
ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
} break; /* Manipulate the forwarding caches. These live * in a sort of kernel/user symbiosis.
*/ case MRT_ADD_MFC: case MRT_DEL_MFC:
parent = -1;
fallthrough; case MRT_ADD_MFC_PROXY: case MRT_DEL_MFC_PROXY: if (optlen != sizeof(mfc)) {
ret = -EINVAL; break;
} if (copy_from_sockptr(&mfc, optval, sizeof(mfc))) {
ret = -EFAULT; break;
} if (parent == 0)
parent = mfc.mfcc_parent; if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY)
ret = ipmr_mfc_delete(mrt, &mfc, parent); else
ret = ipmr_mfc_add(net, mrt, &mfc,
sk == rtnl_dereference(mrt->mroute_sk),
parent); break; case MRT_FLUSH: if (optlen != sizeof(val)) {
ret = -EINVAL; break;
} if (copy_from_sockptr(&val, optval, sizeof(val))) {
ret = -EFAULT; break;
}
mroute_clean_tables(mrt, val); break; /* Control PIM assert. */ case MRT_ASSERT: if (optlen != sizeof(val)) {
ret = -EINVAL; break;
} if (copy_from_sockptr(&val, optval, sizeof(val))) {
ret = -EFAULT; break;
}
mrt->mroute_do_assert = val; break; case MRT_PIM: if (!ipmr_pimsm_enabled()) {
ret = -ENOPROTOOPT; break;
} if (optlen != sizeof(val)) {
ret = -EINVAL; break;
} if (copy_from_sockptr(&val, optval, sizeof(val))) {
ret = -EFAULT; break;
}
do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE);
val = !!val; if (val != mrt->mroute_do_pim) {
mrt->mroute_do_pim = val;
mrt->mroute_do_assert = val;
mrt->mroute_do_wrvifwhole = do_wrvifwhole;
} break; case MRT_TABLE: if (!IS_BUILTIN(CONFIG_IP_MROUTE_MULTIPLE_TABLES)) {
ret = -ENOPROTOOPT; break;
} if (optlen != sizeof(uval)) {
ret = -EINVAL; break;
} if (copy_from_sockptr(&uval, optval, sizeof(uval))) {
ret = -EFAULT; break;
}
if (sk == rtnl_dereference(mrt->mroute_sk)) {
ret = -EBUSY;
} else {
mrt = ipmr_new_table(net, uval); if (IS_ERR(mrt))
ret = PTR_ERR(mrt); else
raw_sk(sk)->ipmr_table = uval;
} break; /* Spurious command, or MRT_VERSION which you cannot set. */ default:
ret = -ENOPROTOOPT;
}
out_unlock:
rtnl_unlock();
out: return ret;
}
/* Execute if this ioctl is a special mroute ioctl */ int ipmr_sk_ioctl(struct sock *sk, unsignedint cmd, void __user *arg)
{ switch (cmd) { /* These userspace buffers will be consumed by ipmr_ioctl() */ case SIOCGETVIFCNT: { struct sioc_vif_req buffer;
return sock_ioctl_inout(sk, cmd, arg, &buffer, sizeof(buffer));
}
} /* return code > 0 means that the ioctl was not executed */ return 1;
}
/* Getsock opt support for the multicast routing system. */ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval,
sockptr_t optlen)
{ int olr; int val; struct net *net = sock_net(sk); struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_IGMP) return -EOPNOTSUPP;
switch (optname) { case MRT_VERSION:
val = 0x0305; break; case MRT_PIM: if (!ipmr_pimsm_enabled()) return -ENOPROTOOPT;
val = mrt->mroute_do_pim; break; case MRT_ASSERT:
val = mrt->mroute_do_assert; break; default: return -ENOPROTOOPT;
}
if (copy_from_sockptr(&olr, optlen, sizeof(int))) return -EFAULT; if (olr < 0) return -EINVAL;
olr = min_t(unsignedint, olr, sizeof(int));
if (copy_to_sockptr(optlen, &olr, sizeof(int))) return -EFAULT; if (copy_to_sockptr(optval, &val, olr)) return -EFAULT; return 0;
}
/* The IP multicast ioctl support routines. */ int ipmr_ioctl(struct sock *sk, int cmd, void *arg)
{ struct vif_device *vif; struct mfc_cache *c; struct net *net = sock_net(sk); struct sioc_vif_req *vr; struct sioc_sg_req *sr; struct mr_table *mrt;
/* Encapsulate a packet by attaching a valid IPIP header to it. * This avoids tunnel drivers and other mess and gives us the speed so * important for multicast video.
*/ staticvoid ip_encap(struct net *net, struct sk_buff *skb,
__be32 saddr, __be32 daddr)
{ struct iphdr *iph; conststruct iphdr *old_iph = ip_hdr(skb);
if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) { /* Do not fragment multicasts. Alas, IPv4 does not * allow to send ICMP, so that packets will disappear * to blackhole.
*/
IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS);
ip_rt_put(rt); return -1;
}
/* FIXME: forward and output firewalls used to be called here. * What do we do with netfilter? -- RR
*/ if (vif->flags & VIFF_TUNNEL) {
ip_encap(net, skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */
DEV_STATS_INC(vif_dev, tx_packets);
DEV_STATS_ADD(vif_dev, tx_bytes, skb->len);
}
return 0;
}
staticvoid ipmr_queue_fwd_xmit(struct net *net, struct mr_table *mrt, int in_vifi, struct sk_buff *skb, int vifi)
{ struct rtable *rt;
if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) goto out_free;
if (ipmr_prepare_xmit(net, mrt, skb, vifi)) goto out_free;
rt = skb_rtable(skb);
IPCB(skb)->flags |= IPSKB_FORWARDED;
/* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output * interfaces. It is clear, if mrouter runs a multicasting * program, it should receive packets not depending to what interface * program is joined. * If we will not make it, the program will have to join on all * interfaces. On the other hand, multihoming host (or router, but * not mrouter) cannot join to more than one interface - it will * result in receiving multiple packets.
*/
NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
net, NULL, skb, skb->dev, dst_dev_rcu(&rt->dst),
ipmr_forward_finish); return;
out_free:
kfree_skb(skb);
}
staticvoid ipmr_queue_output_xmit(struct net *net, struct mr_table *mrt, struct sk_buff *skb, int vifi)
{ if (ipmr_prepare_xmit(net, mrt, skb, vifi)) goto out_free;
ip_mc_output(net, NULL, skb); return;
out_free:
kfree_skb(skb);
}
/* Called with mrt_lock or rcu_read_lock() */ staticint ipmr_find_vif(conststruct mr_table *mrt, struct net_device *dev)
{ int ct; /* Pairs with WRITE_ONCE() in vif_delete()/vif_add() */ for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) break;
} return ct;
}
/* "local" means that we should preserve one skb (for local delivery) */ /* Called uner rcu_read_lock() */ staticvoid ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc_cache *c, int local)
{ int true_vifi = ipmr_find_vif(mrt, dev); int psend = -1; int vif, ct;
/* For an (*,G) entry, we only check that the incoming * interface is part of the static tree.
*/
cache_proxy = mr_mfc_find_any_parent(mrt, vif); if (cache_proxy &&
cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) goto forward;
}
/* Wrong interface: drop packet and (maybe) send PIM assert. */ if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... * * The best workaround until routing daemons will be * fixed is not to redistribute packet, if it was * send through wrong interface. It means, that * multicast applications WILL NOT work for * (S,G), which have default multicast route pointing * to wrong oif. In any case, it is not a good * idea to use multicasting applications on router.
*/ goto dont_forward;
}
atomic_long_inc(&c->_c.mfc_un.res.wrong_if);
if (true_vifi >= 0 && mrt->mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, * so that we cannot check that packet arrived on an oif. * It is bad, but otherwise we would need to move pretty * large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
c->_c.mfc_un.res.last_assert +
MFC_ASSERT_THRESH)) {
c->_c.mfc_un.res.last_assert = jiffies;
ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); if (mrt->mroute_do_wrvifwhole)
ipmr_cache_report(mrt, skb, true_vifi,
IGMPMSG_WRVIFWHOLE);
} goto dont_forward;
}
/* Multicast packets for forwarding arrive here * Called with rcu_read_lock();
*/ int ip_mr_input(struct sk_buff *skb)
{ struct mfc_cache *cache; struct net *net = dev_net(skb->dev); int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL; struct mr_table *mrt; struct net_device *dev;
/* skb->dev passed in is the loX master dev for vrfs. * As there are no vifs associated with loopback devices, * get the proper interface that does have a vif associated with it.
*/
dev = skb->dev; if (netif_is_l3_master(skb->dev)) {
dev = dev_get_by_index_rcu(net, IPCB(skb)->iif); if (!dev) {
kfree_skb(skb); return -ENODEV;
}
}
/* Packet is looped back after forward, it should not be * forwarded second time, but still can be delivered locally.
*/ if (IPCB(skb)->flags & IPSKB_FORWARDED) goto dont_forward;
mrt = ipmr_rt_fib_lookup(net, skb); if (IS_ERR(mrt)) {
kfree_skb(skb); return PTR_ERR(mrt);
} if (!local) { if (IPCB(skb)->opt.router_alert) { if (ip_call_ra_chain(skb)) return 0;
} elseif (ip_hdr(skb)->protocol == IPPROTO_IGMP) { /* IGMPv1 (and broken IGMPv2 implementations sort of * Cisco IOS <= 11.2(8)) do not put router alert * option to IGMP packets destined to routable * groups. It is very bad, because it means * that we can forward NO IGMP messages.
*/ struct sock *mroute_sk;
/* Forward the frame */ if (c->mfc_origin == htonl(INADDR_ANY) &&
c->mfc_mcastgrp == htonl(INADDR_ANY)) { if (ip_hdr(skb)->ttl >
c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) { /* It's an (*,*) entry and the packet is not coming from * the upstream: forward the packet to the upstream * only.
*/
psend = c->_c.mfc_parent; goto last_xmit;
} goto dont_xmit;
}
for (ct = c->_c.mfc_un.res.maxvif - 1;
ct >= c->_c.mfc_un.res.minvif; ct--) { if (ip_hdr(skb)->ttl > c->_c.mfc_un.res.ttls[ct]) { if (psend != -1) { struct sk_buff *skb2;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.