// SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * IPv4 Forwarding Information Base: semantics. * * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
/* for_nexthops and change_nexthops only used when nexthop object * is not set in a fib_info. The logic within can reference fib_nh.
*/ #ifdef CONFIG_IP_ROUTE_MULTIPATH
staticstruct hlist_head *fib_info_hash_alloc(unsignedint hash_bits)
{ /* The second half is used for prefsrc */ return kvcalloc((1 << hash_bits) * 2, sizeof(struct hlist_head),
GFP_KERNEL);
}
/* Check, that the gateway is already configured. * Used only by redirect accept routine, under rcu_read_lock();
*/ int ip_fib_check_default(__be32 gw, struct net_device *dev)
{ struct hlist_head *head; struct fib_nh *nh;
/* only called when fib_nh is integrated into fib_info */ staticint fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, int remaining, struct fib_config *cfg, struct netlink_ext_ack *extack)
{ struct net *net = fi->fib_net; struct fib_config fib_cfg; struct fib_nh *nh; int ret;
change_nexthops(fi) { int attrlen;
memset(&fib_cfg, 0, sizeof(fib_cfg));
if (!rtnh_ok(rtnh, remaining)) {
NL_SET_ERR_MSG(extack, "Invalid nexthop configuration - extra data after nexthop"); return -EINVAL;
}
if (rtnh->rtnh_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
NL_SET_ERR_MSG(extack, "Invalid flags for nexthop - can not contain DEAD or LINKDOWN"); return -EINVAL;
}
nla = nla_find(attrs, attrlen, RTA_GATEWAY);
nlav = nla_find(attrs, attrlen, RTA_VIA); if (nla && nlav) {
NL_SET_ERR_MSG(extack, "Nexthop configuration can not contain both GATEWAY and VIA"); return -EINVAL;
} if (nla) {
ret = fib_gw_from_attr(&fib_cfg.fc_gw4, nla,
extack); if (ret) goto errout;
if (fib_cfg.fc_gw4)
fib_cfg.fc_gw_family = AF_INET;
} elseif (nlav) {
ret = fib_gw_from_via(&fib_cfg, nlav, extack); if (ret) goto errout;
}
ret = -EINVAL;
nh = fib_info_nh(fi, 0); if (cfg->fc_oif && nh->fib_nh_oif != cfg->fc_oif) {
NL_SET_ERR_MSG(extack, "Nexthop device index does not match RTA_OIF"); goto errout;
} if (cfg->fc_gw_family) { if (cfg->fc_gw_family != nh->fib_nh_gw_family ||
(cfg->fc_gw_family == AF_INET &&
nh->fib_nh_gw4 != cfg->fc_gw4) ||
(cfg->fc_gw_family == AF_INET6 &&
ipv6_addr_cmp(&nh->fib_nh_gw6, &cfg->fc_gw6))) {
NL_SET_ERR_MSG(extack, "Nexthop gateway does not match RTA_GATEWAY or RTA_VIA"); goto errout;
}
} #ifdef CONFIG_IP_ROUTE_CLASSID if (cfg->fc_flow && nh->nh_tclassid != cfg->fc_flow) {
NL_SET_ERR_MSG(extack, "Nexthop class id does not match RTA_FLOW"); goto errout;
} #endif
ret = 0;
errout: return ret;
}
/* only called when fib_nh is integrated into fib_info */ staticvoid fib_rebalance(struct fib_info *fi)
{ int total; int w;
if (fib_info_num_path(fi) < 2) return;
total = 0;
for_nexthops(fi) { if (nh->fib_nh_flags & RTNH_F_DEAD) continue;
if (ip_ignore_linkdown(nh->fib_nh_dev) &&
nh->fib_nh_flags & RTNH_F_LINKDOWN) continue;
total += nh->fib_nh_weight;
} endfor_nexthops(fi);
/* * Picture * ------- * * Semantics of nexthop is very messy by historical reasons. * We have to take into account, that: * a) gateway can be actually local interface address, * so that gatewayed route is direct. * b) gateway must be on-link address, possibly * described not by an ifaddr, but also by a direct route. * c) If both gateway and interface are specified, they should not * contradict. * d) If we use tunnel routes, gateway could be not on-link. * * Attempt to reconcile all of these (alas, self-contradictory) conditions * results in pretty ugly and hairy code with obscure logic. * * I chose to generalized it instead, so that the size * of code does not increase practically, but it becomes * much more general. * Every prefix is assigned a "scope" value: "host" is local address, * "link" is direct route, * [ ... "site" ... "interior" ... ] * and "universe" is true gateway route with global meaning. * * Every prefix refers to a set of "nexthop"s (gw, oif), * where gw must have narrower scope. This recursion stops * when gw has LOCAL scope or if "nexthop" is declared ONLINK, * which means that gw is forced to be on link. * * Code is still hairy, but now it is apparently logically * consistent and very flexible. F.e. as by-product it allows * to co-exists in peace independent exterior and interior * routing processes. * * Normally it looks as following. * * {universe prefix} -> (gw, oif) [scope link] * | * |-> {link prefix} -> (gw, oif) [scope local] * | * |-> {local prefix} (terminal node)
*/ staticint fib_check_nh_v4_gw(struct net *net, struct fib_nh *nh, u32 table,
u8 scope, struct netlink_ext_ack *extack)
{ struct net_device *dev; struct fib_result res; int err = 0;
if (nh->fib_nh_flags & RTNH_F_ONLINK) { unsignedint addr_type;
if (scope >= RT_SCOPE_LINK) {
NL_SET_ERR_MSG(extack, "Nexthop has invalid scope"); return -EINVAL;
}
dev = __dev_get_by_index(net, nh->fib_nh_oif); if (!dev) {
NL_SET_ERR_MSG(extack, "Nexthop device required for onlink"); return -ENODEV;
} if (!(dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack, "Nexthop device is not up"); return -ENETDOWN;
}
addr_type = inet_addr_type_dev_table(net, dev, nh->fib_nh_gw4); if (addr_type != RTN_UNICAST) {
NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); return -EINVAL;
} if (!netif_carrier_ok(dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
nh->fib_nh_dev = dev;
netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC);
nh->fib_nh_scope = RT_SCOPE_LINK; return 0;
}
rcu_read_lock();
{ struct fib_table *tbl = NULL; struct flowi4 fl4 = {
.daddr = nh->fib_nh_gw4,
.flowi4_scope = scope + 1,
.flowi4_oif = nh->fib_nh_oif,
.flowi4_iif = LOOPBACK_IFINDEX,
};
/* It is not necessary, but requires a bit of thinking */ if (fl4.flowi4_scope < RT_SCOPE_LINK)
fl4.flowi4_scope = RT_SCOPE_LINK;
if (table && table != RT_TABLE_MAIN)
tbl = fib_get_table(net, table);
if (tbl)
err = fib_table_lookup(tbl, &fl4, &res,
FIB_LOOKUP_IGNORE_LINKSTATE |
FIB_LOOKUP_NOREF);
/* on error or if no table given do full lookup. This * is needed for example when nexthops are in the local * table rather than the given table
*/ if (!tbl || err) {
err = fib_lookup(net, &fl4, &res,
FIB_LOOKUP_IGNORE_LINKSTATE);
}
if (err) {
NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); goto out;
}
}
err = -EINVAL; if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) {
NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway"); goto out;
}
nh->fib_nh_scope = res.scope;
nh->fib_nh_oif = FIB_RES_OIF(res);
nh->fib_nh_dev = dev = FIB_RES_DEV(res); if (!dev) {
NL_SET_ERR_MSG(extack, "No egress device for nexthop gateway"); goto out;
}
netdev_hold(dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC); if (!netif_carrier_ok(dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
out:
rcu_read_unlock(); return err;
}
staticint fib_check_nh_nongw(struct net *net, struct fib_nh *nh, struct netlink_ext_ack *extack)
{ struct in_device *in_dev; int err;
if (nh->fib_nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) {
NL_SET_ERR_MSG(extack, "Invalid flags for nexthop - PERVASIVE and ONLINK can not be set"); return -EINVAL;
}
rcu_read_lock();
err = -ENODEV;
in_dev = inetdev_by_index(net, nh->fib_nh_oif); if (!in_dev) goto out;
err = -ENETDOWN; if (!(in_dev->dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack, "Device for nexthop is not up"); goto out;
}
ASSERT_RTNL(); if (cfg->fc_type > RTN_MAX) goto err_inval;
/* Fast check to catch the most weird cases */ if (fib_props[cfg->fc_type].scope > cfg->fc_scope) {
NL_SET_ERR_MSG(extack, "Invalid scope"); goto err_inval;
}
if (cfg->fc_flags & (RTNH_F_DEAD | RTNH_F_LINKDOWN)) {
NL_SET_ERR_MSG(extack, "Invalid rtm_flags - can not contain DEAD or LINKDOWN"); goto err_inval;
}
if (cfg->fc_nh_id) { if (!cfg->fc_mx) {
fi = fib_find_info_nh(net, cfg); if (fi) {
refcount_inc(&fi->fib_treeref); return fi;
}
}
nh = nexthop_find_by_id(net, cfg->fc_nh_id); if (!nh) {
NL_SET_ERR_MSG(extack, "Nexthop id does not exist"); goto err_inval;
}
nhs = 0;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH if (cfg->fc_mp) {
nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack); if (nhs == 0) goto err_inval;
} #endif
fib_info_hash_grow(net);
fi = kzalloc(struct_size(fi, fib_nh, nhs), GFP_KERNEL); if (!fi) {
err = -ENOBUFS; goto failure;
}
if (fib_props[cfg->fc_type].error) { if (cfg->fc_gw_family || cfg->fc_oif || cfg->fc_mp) {
NL_SET_ERR_MSG(extack, "Gateway, device and multipath can not be specified for this route type"); goto err_inval;
} goto link_it;
} else { switch (cfg->fc_type) { case RTN_UNICAST: case RTN_LOCAL: case RTN_BROADCAST: case RTN_ANYCAST: case RTN_MULTICAST: break; default:
NL_SET_ERR_MSG(extack, "Invalid route type"); goto err_inval;
}
}
/* Local address is added. */ if (nhs != 1) {
NL_SET_ERR_MSG(extack, "Route with host scope can not have multiple nexthops"); goto err_inval;
} if (nh->fib_nh_gw_family) {
NL_SET_ERR_MSG(extack, "Route with host scope can not have a gateway"); goto err_inval;
}
nh->fib_nh_scope = RT_SCOPE_NOWHERE;
nh->fib_nh_dev = dev_get_by_index(net, nh->fib_nh_oif);
err = -ENODEV; if (!nh->fib_nh_dev) goto failure;
netdev_tracker_alloc(nh->fib_nh_dev, &nh->fib_nh_dev_tracker,
GFP_KERNEL);
} else { int linkdown = 0;
change_nexthops(fi) {
err = fib_check_nh(cfg->fc_nlinfo.nl_net, nexthop_nh,
cfg->fc_table, cfg->fc_scope,
extack); if (err != 0) goto failure; if (nexthop_nh->fib_nh_flags & RTNH_F_LINKDOWN)
linkdown++;
} endfor_nexthops(fi) if (linkdown == fi->fib_nhs)
fi->fib_flags |= RTNH_F_LINKDOWN;
}
if (nhc->nhc_flags & RTNH_F_LINKDOWN) {
*flags |= RTNH_F_LINKDOWN;
rcu_read_lock(); switch (nhc->nhc_family) { case AF_INET: if (ip_ignore_linkdown(nhc->nhc_dev))
*flags |= RTNH_F_DEAD; break; case AF_INET6: if (ip6_ignore_linkdown(nhc->nhc_dev))
*flags |= RTNH_F_DEAD; break;
}
rcu_read_unlock();
}
switch (nhc->nhc_gw_family) { case AF_INET: if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4)) goto nla_put_failure; break; case AF_INET6: /* if gateway family does not match nexthop family * gateway is encoded as RTA_VIA
*/ if (rt_family != nhc->nhc_gw_family) { int alen = sizeof(struct in6_addr); struct nlattr *nla; struct rtvia *via;
if (rtm->rtm_dst_len &&
nla_put_in_addr(skb, RTA_DST, fri->dst)) goto nla_put_failure; if (fi->fib_priority &&
nla_put_u32(skb, RTA_PRIORITY, fi->fib_priority)) goto nla_put_failure; if (rtnetlink_put_metrics(skb, fi->fib_metrics->metrics) < 0) goto nla_put_failure;
if (fi->fib_prefsrc &&
nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure;
if (fi->nh) { if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id)) goto nla_put_failure; if (nexthop_is_blackhole(fi->nh))
rtm->rtm_type = RTN_BLACKHOLE; if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode)) goto offload;
}
/* * Update FIB if: * - local address disappeared -> we must delete all the entries * referring to it. * - device went down -> we must shutdown all nexthops going via it.
*/ int fib_sync_down_addr(struct net_device *dev, __be32 local)
{ int tb_id = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN; struct net *net = dev_net(dev); struct hlist_head *head; struct fib_info *fi; int ret = 0;
switch (event_type) { case FIB_EVENT_NH_ADD: if (nh->fib_nh_flags & RTNH_F_DEAD) break; if (ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) break; return call_fib4_notifiers(dev_net(nh->fib_nh_dev), event_type,
&info.info); case FIB_EVENT_NH_DEL: if ((ignore_link_down && nh->fib_nh_flags & RTNH_F_LINKDOWN) ||
(nh->fib_nh_flags & RTNH_F_DEAD)) return call_fib4_notifiers(dev_net(nh->fib_nh_dev),
event_type, &info.info); break; default: break;
}
return NOTIFY_DONE;
}
/* Update the PMTU of exceptions when: * - the new MTU of the first hop becomes smaller than the PMTU * - the old MTU was the same as the PMTU, and it limited discovery of * larger MTUs on the path. With that limit raised, we can now * discover larger MTUs * A special case is locked exceptions, for which the PMTU is smaller * than the minimal accepted PMTU: * - if the new MTU is greater than the PMTU, don't make any change * - otherwise, unlock and set PMTU
*/ void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig)
{ struct fnhe_hash_bucket *bucket; int i;
bucket = rcu_dereference_protected(nhc->nhc_exceptions, 1); if (!bucket) return;
for (i = 0; i < FNHE_HASH_SIZE; i++) { struct fib_nh_exception *fnhe;
for (fnhe = rcu_dereference_protected(bucket[i].chain, 1);
fnhe;
fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { if (fnhe->fnhe_mtu_locked) { if (new <= fnhe->fnhe_pmtu) {
fnhe->fnhe_pmtu = new;
fnhe->fnhe_mtu_locked = false;
}
} elseif (new < fnhe->fnhe_pmtu ||
orig == fnhe->fnhe_pmtu) {
fnhe->fnhe_pmtu = new;
}
}
}
}
/* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host * NETDEV_DOWN 1 LINKDOWN|DEAD Last address removed * NETDEV_UNREGISTER 1 LINKDOWN|DEAD Device removed * * only used when fib_nh is built into fib_info
*/ int fib_sync_down_dev(struct net_device *dev, unsignedlong event, bool force)
{ struct hlist_head *head = fib_nh_head(dev); struct fib_info *prev_fi = NULL; int scope = RT_SCOPE_NOWHERE; struct fib_nh *nh; int ret = 0;
/* * Dead device goes up. We wake up dead nexthops. * It takes sense only on multipath routes. * * only used when fib_nh is built into fib_info
*/ int fib_sync_up(struct net_device *dev, unsignedchar nh_flags)
{ struct fib_info *prev_fi; struct hlist_head *head; struct fib_nh *nh; int ret;
if (!(dev->flags & IFF_UP)) return 0;
if (nh_flags & RTNH_F_DEAD) { unsignedint flags = netif_get_flags(dev);
if (flags & (IFF_RUNNING | IFF_LOWER_UP))
nh_flags |= RTNH_F_LINKDOWN;
}
BUG_ON(!fi->fib_nhs);
DEBUG_NET_WARN_ON_ONCE(nh->fib_nh_dev != dev); if (fi == prev_fi) continue;
prev_fi = fi;
alive = 0;
change_nexthops(fi) { if (!(nexthop_nh->fib_nh_flags & nh_flags)) {
alive++; continue;
} if (!nexthop_nh->fib_nh_dev ||
!(nexthop_nh->fib_nh_dev->flags & IFF_UP)) continue; if (nexthop_nh->fib_nh_dev != dev ||
!__in_dev_get_rtnl(dev)) continue;
alive++;
nexthop_nh->fib_nh_flags &= ~nh_flags;
call_fib_nh_notifiers(nexthop_nh, FIB_EVENT_NH_ADD);
} endfor_nexthops(fi)
if (alive > 0) {
fi->fib_flags &= ~nh_flags;
ret++;
}
fib_rebalance(fi);
}
return ret;
}
#ifdef CONFIG_IP_ROUTE_MULTIPATH staticbool fib_good_nh(conststruct fib_nh *nh)
{ int state = NUD_REACHABLE;
if (nh->fib_nh_scope == RT_SCOPE_LINK) { struct neighbour *n;
rcu_read_lock();
if (likely(nh->fib_nh_gw_family == AF_INET))
n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev,
(__force u32)nh->fib_nh_gw4); elseif (nh->fib_nh_gw_family == AF_INET6)
n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev,
&nh->fib_nh_gw6); else
n = NULL; if (n)
state = READ_ONCE(n->nud_state);
rcu_read_unlock();
}
return !!(state & NUD_VALID);
}
void fib_select_multipath(struct fib_result *res, int hash, conststruct flowi4 *fl4)
{ struct fib_info *fi = res->fi; struct net *net = fi->fib_net; bool found = false; bool use_neigh;
__be32 saddr;
if (unlikely(res->fi->nh)) {
nexthop_path_fib_result(res, hash); return;
}
/* Nexthops without a carrier are assigned an upper bound of * minus one when "ignore_routes_with_linkdown" is set.
*/
nh_upper_bound = atomic_read(&nexthop_nh->fib_nh_upper_bound); if (nh_upper_bound == -1 ||
(use_neigh && !fib_good_nh(nexthop_nh))) continue;
if (!found) {
res->nh_sel = nhsel;
res->nhc = &nexthop_nh->nh_common;
found = !saddr || nexthop_nh->nh_saddr == saddr;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.