// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * IPv4 Forwarding Information Base: FIB frontend. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
/* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration.
*/ staticinlineunsignedint __inet_dev_addr_type(struct net *net, conststruct net_device *dev,
__be32 addr, u32 tb_id)
{ struct flowi4 fl4 = { .daddr = addr }; struct fib_result res; unsignedint ret = RTN_BROADCAST; struct fib_table *table;
if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) return RTN_BROADCAST; if (ipv4_is_multicast(addr)) return RTN_MULTICAST;
rcu_read_lock();
table = fib_get_table(net, tb_id); if (table) {
ret = RTN_UNICAST; if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) { struct fib_nh_common *nhc = fib_info_nhc(res.fi, 0);
if (!dev || dev == nhc->nhc_dev)
ret = res.type;
}
}
/* inet_addr_type with dev == NULL but using the table from a dev * if one is associated
*/ unsignedint inet_addr_type_dev_table(struct net *net, conststruct net_device *dev,
__be32 addr)
{
u32 rt_table = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
dev_match = fib_info_nh_uses_dev(res.fi, dev); /* This is not common, loopback packets retain skb_dst so normally they * would not even hit this slow path.
*/
dev_match = dev_match || (res.type == RTN_LOCAL &&
dev == net->loopback_dev); if (dev_match) {
ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; return ret;
} if (no_addr) goto last_resort; if (rpf == 1) goto e_rpf;
fl4.flowi4_oif = dev->ifindex;
ret = 0; if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) { if (res.type == RTN_UNICAST)
ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST;
} return ret;
last_resort: if (rpf) goto e_rpf;
*itag = 0; return 0;
/* Ignore rp_filter for packets protected by IPsec. */ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
dscp_t dscp, int oif, struct net_device *dev, struct in_device *idev, u32 *itag)
{ int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); struct net *net = dev_net(dev);
if (!r && !fib_num_tclassid_users(net) &&
(dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { if (IN_DEV_ACCEPT_LOCAL(idev)) goto ok; /* with custom local routes in place, checking local addresses * only will be too optimistic, with custom rules, checking * local addresses only can be too strict, e.g. due to vrf
*/ if (net->ipv4.fib_has_custom_local_routes ||
fib4_has_custom_rules(net)) goto full_check; /* Within the same container, it is regarded as a martian source, * and the same host but different containers are not.
*/ if (inet_lookup_ifaddr_rcu(net, src)) return -SKB_DROP_REASON_IP_LOCAL_SOURCE;
if (rt->rt_dst.sa_family != AF_INET) return -EAFNOSUPPORT;
/* * Check mask for validity: * a) it must be contiguous. * b) destination must have all host bits clear. * c) if application forgot to set correct family (AF_INET), * reject request unless it is absolutely clear i.e. * both family and mask are zero.
*/
plen = 32;
addr = sk_extract_addr(&rt->rt_dst); if (!(rt->rt_flags & RTF_HOST)) {
__be32 mask = sk_extract_addr(&rt->rt_genmask);
if (rt->rt_genmask.sa_family != AF_INET) { if (mask || rt->rt_genmask.sa_family) return -EAFNOSUPPORT;
}
if (!cfg->fc_table)
cfg->fc_table = RT_TABLE_MAIN;
if (cmd == SIOCDELRT) return 0;
if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw_family) return -EINVAL;
if (cfg->fc_scope == RT_SCOPE_NOWHERE)
cfg->fc_scope = RT_SCOPE_LINK;
if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { struct nlattr *mx; int len = 0;
mx = kcalloc(3, nla_total_size(4), GFP_KERNEL); if (!mx) return -ENOMEM;
if (rt->rt_flags & RTF_MTU)
len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40);
if (rt->rt_flags & RTF_WINDOW)
len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window);
if (rt->rt_flags & RTF_IRTT)
len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3);
cfg->fc_mx = mx;
cfg->fc_mx_len = len;
}
return 0;
}
/* * Handle IP routing ioctl calls. * These are used to manipulate the routing tables
*/ int ip_rt_ioctl(struct net *net, unsignedint cmd, struct rtentry *rt)
{ struct fib_config cfg; int err;
switch (cmd) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM;
err = 0; for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) {
e = 0;
head = &net->ipv4.fib_table_hash[h];
hlist_for_each_entry_rcu(tb, head, tb_hlist) { if (e < s_e) goto next; if (dumped)
memset(&cb->args[2], 0, sizeof(cb->args) -
2 * sizeof(cb->args[0]));
err = fib_table_dump(tb, skb, cb, &filter); if (err < 0) goto out;
dumped = 1;
next:
e++;
}
}
out:
cb->args[1] = e;
cb->args[0] = h;
unlock:
rcu_read_unlock(); return err;
}
/* Prepare and feed intra-kernel routing request. * Really, it should be netlink message, but :-( netlink * can be not configured, so that we feed it directly * to fib engine. It is legal, because all events occur * only when netlink is already locked.
*/ staticvoid fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa, u32 rt_priority)
{ struct net *net = dev_net(ifa->ifa_dev->dev);
u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev); struct fib_table *tb; struct fib_config cfg = {
.fc_protocol = RTPROT_KERNEL,
.fc_type = type,
.fc_dst = dst,
.fc_dst_len = dst_len,
.fc_priority = rt_priority,
.fc_prefsrc = ifa->ifa_local,
.fc_oif = ifa->ifa_dev->dev->ifindex,
.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
.fc_nlinfo = {
.nl_net = net,
},
};
/* add the new */
fib_magic(RTM_NEWROUTE,
dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
prefix, ifa->ifa_prefixlen, ifa, new_metric);
/* delete the old */
fib_magic(RTM_DELROUTE,
dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
prefix, ifa->ifa_prefixlen, ifa, ifa->ifa_rt_priority);
}
/* Delete primary or secondary address. * Optionally, on secondary address promotion consider the addresses * from subnet iprim as deleted, even if they are in device list. * In this case the secondary ifa can be in device list.
*/ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
{ struct in_device *in_dev = ifa->ifa_dev; struct net_device *dev = in_dev->dev; struct in_ifaddr *ifa1; struct in_ifaddr *prim = ifa, *prim1 = NULL;
__be32 brd = ifa->ifa_address | ~ifa->ifa_mask;
__be32 any = ifa->ifa_address & ifa->ifa_mask; #define LOCAL_OK 1 #define BRD_OK 2 #define BRD0_OK 4 #define BRD1_OK 8 unsignedint ok = 0; int subnet = 0; /* Primary network */ int gone = 1; /* Address is missing */ int same_prefsrc = 0; /* Another primary with same IP */
if (ifa->ifa_flags & IFA_F_SECONDARY) {
prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); if (!prim) { /* if the device has been deleted, we don't perform * address promotion
*/ if (!in_dev->dead)
pr_warn("%s: bug: prim == NULL\n", __func__); return;
} if (iprim && iprim != prim) {
pr_warn("%s: bug: iprim != prim\n", __func__); return;
}
} elseif (!ipv4_is_zeronet(any) &&
(any != ifa->ifa_local || ifa->ifa_prefixlen < 32)) { if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
fib_magic(RTM_DELROUTE,
dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
any, ifa->ifa_prefixlen, prim, 0);
subnet = 1;
}
if (in_dev->dead) goto no_promotions;
/* Deletion is more complicated than add. * We should take care of not to delete too much :-) * * Scan address list to be sure that addresses are really gone.
*/
rcu_read_lock();
in_dev_for_each_ifa_rcu(ifa1, in_dev) { if (ifa1 == ifa) { /* promotion, keep the IP */
gone = 0; continue;
} /* Ignore IFAs from our subnet */ if (iprim && ifa1->ifa_mask == iprim->ifa_mask &&
inet_ifa_match(ifa1->ifa_address, iprim)) continue;
/* Ignore ifa1 if it uses different primary IP (prefsrc) */ if (ifa1->ifa_flags & IFA_F_SECONDARY) { /* Another address from our subnet? */ if (ifa1->ifa_mask == prim->ifa_mask &&
inet_ifa_match(ifa1->ifa_address, prim))
prim1 = prim; else { /* We reached the secondaries, so * same_prefsrc should be determined.
*/ if (!same_prefsrc) continue; /* Search new prim1 if ifa1 is not * using the current prim1
*/ if (!prim1 ||
ifa1->ifa_mask != prim1->ifa_mask ||
!inet_ifa_match(ifa1->ifa_address, prim1))
prim1 = inet_ifa_byprefix(in_dev,
ifa1->ifa_address,
ifa1->ifa_mask); if (!prim1) continue; if (prim1->ifa_local != prim->ifa_local) continue;
}
} else { if (prim->ifa_local != ifa1->ifa_local) continue;
prim1 = ifa1; if (prim != prim1)
same_prefsrc = 1;
} if (ifa->ifa_local == ifa1->ifa_local)
ok |= LOCAL_OK; if (ifa->ifa_broadcast == ifa1->ifa_broadcast)
ok |= BRD_OK; if (brd == ifa1->ifa_broadcast)
ok |= BRD1_OK; if (any == ifa1->ifa_broadcast)
ok |= BRD0_OK; /* primary has network specific broadcasts */ if (prim1 == ifa1 && ifa1->ifa_prefixlen < 31) {
__be32 brd1 = ifa1->ifa_address | ~ifa1->ifa_mask;
__be32 any1 = ifa1->ifa_address & ifa1->ifa_mask;
if (!ipv4_is_zeronet(any1)) { if (ifa->ifa_broadcast == brd1 ||
ifa->ifa_broadcast == any1)
ok |= BRD_OK; if (brd == brd1 || brd == any1)
ok |= BRD1_OK; if (any == brd1 || any == any1)
ok |= BRD0_OK;
}
}
}
rcu_read_unlock();
no_promotions: if (!(ok & BRD_OK))
fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
prim, 0); if (subnet && ifa->ifa_prefixlen < 31) { if (!(ok & BRD1_OK))
fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32,
prim, 0); if (!(ok & BRD0_OK))
fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32,
prim, 0);
} if (!(ok & LOCAL_OK)) { unsignedint addr_type;
/* Check, that this local address finally disappeared. */
addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
ifa->ifa_local); if (gone && addr_type != RTN_LOCAL) { /* And the last, but not the least thing. * We must flush stray FIB entries. * * First of all, we scan fib_info list searching * for stray nexthop entries, then ignite fib_flush.
*/ if (fib_sync_down_addr(dev, ifa->ifa_local))
fib_flush(dev_net(dev));
}
} #undef LOCAL_OK #undef BRD_OK #undef BRD0_OK #undef BRD1_OK
}
staticvoid nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
{
staticvoid ip_fib_net_exit(struct net *net)
{ int i;
ASSERT_RTNL_NET(net); #ifdef CONFIG_IP_MULTIPLE_TABLES
RCU_INIT_POINTER(net->ipv4.fib_main, NULL);
RCU_INIT_POINTER(net->ipv4.fib_default, NULL); #endif /* Destroy the tables in reverse order to guarantee that the * local table, ID 255, is destroyed before the main table, ID * 254. This is necessary as the local table may contain * references to data contained in the main table.
*/ for (i = FIB_TABLE_HASHSZ - 1; i >= 0; i--) { struct hlist_head *head = &net->ipv4.fib_table_hash[i]; struct hlist_node *tmp; struct fib_table *tb;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.