// SPDX-License-Identifier: GPL-1.0+ /* * originally based on the dummy device. * * Copyright 1999, Thomas Davis, tadavis@lbl.gov. * Based on dummy.c, and eql.c devices. * * bonding.c: an Ethernet Bonding driver * * This is useful to talk to a Cisco EtherChannel compatible equipment: * Cisco 5500 * Sun Trunking (Solaris) * Alteon AceDirector Trunks * Linux Bonding * and probably many L2 switches ... * * How it works: * ifconfig bond0 ipaddress netmask up * will setup a network device, with an ip address. No mac address * will be assigned at this time. The hw mac address will come from * the first slave bonded to the channel. All slaves will then use * this hw mac address. * * ifconfig bond0 down * will release all slaves, marking them as down. * * ifenslave bond0 eth0 * will attach eth0 to bond0 as a slave. eth0 hw mac address will either * a: be used as initial mac address * b: if a hw mac address already is there, eth0's hw mac address * will then be set from bond0. *
*/
module_param(max_bonds, int, 0);
MODULE_PARM_DESC(max_bonds, "Max number of bonded devices");
module_param(tx_queues, int, 0);
MODULE_PARM_DESC(tx_queues, "Max number of transmit queues (default = 16)");
module_param_named(num_grat_arp, num_peer_notif, int, 0644);
MODULE_PARM_DESC(num_grat_arp, "Number of peer notifications to send on " "failover event (alias of num_unsol_na)");
module_param_named(num_unsol_na, num_peer_notif, int, 0644);
MODULE_PARM_DESC(num_unsol_na, "Number of peer notifications to send on " "failover event (alias of num_grat_arp)");
module_param(miimon, int, 0);
MODULE_PARM_DESC(miimon, "Link check interval in milliseconds");
module_param(updelay, int, 0);
MODULE_PARM_DESC(updelay, "Delay before considering link up, in milliseconds");
module_param(downdelay, int, 0);
MODULE_PARM_DESC(downdelay, "Delay before considering link down, " "in milliseconds");
module_param(use_carrier, int, 0);
MODULE_PARM_DESC(use_carrier, "Use netif_carrier_ok (vs MII ioctls) in miimon; " "0 for off, 1 for on (default)");
module_param(mode, charp, 0);
MODULE_PARM_DESC(mode, "Mode of operation; 0 for balance-rr, " "1 for active-backup, 2 for balance-xor, " "3 for broadcast, 4 for 802.3ad, 5 for balance-tlb, " "6 for balance-alb");
module_param(primary, charp, 0);
MODULE_PARM_DESC(primary, "Primary network device to use");
module_param(primary_reselect, charp, 0);
MODULE_PARM_DESC(primary_reselect, "Reselect primary slave " "once it comes up; " "0 for always (default), " "1 for only if speed of primary is " "better, " "2 for only on active slave " "failure");
module_param(lacp_rate, charp, 0);
MODULE_PARM_DESC(lacp_rate, "LACPDU tx rate to request from 802.3ad partner; " "0 for slow, 1 for fast");
module_param(ad_select, charp, 0);
MODULE_PARM_DESC(ad_select, "802.3ad aggregation selection logic; " "0 for stable (default), 1 for bandwidth, " "2 for count");
module_param(min_links, int, 0);
MODULE_PARM_DESC(min_links, "Minimum number of available links before turning on carrier");
module_param(xmit_hash_policy, charp, 0);
MODULE_PARM_DESC(xmit_hash_policy, "balance-alb, balance-tlb, balance-xor, 802.3ad hashing method; " "0 for layer 2 (default), 1 for layer 3+4, " "2 for layer 2+3, 3 for encap layer 2+3, " "4 for encap layer 3+4, 5 for vlan+srcmac");
module_param(arp_interval, int, 0);
MODULE_PARM_DESC(arp_interval, "arp interval in milliseconds");
module_param_array(arp_ip_target, charp, NULL, 0);
MODULE_PARM_DESC(arp_ip_target, "arp targets in n.n.n.n form");
module_param(arp_validate, charp, 0);
MODULE_PARM_DESC(arp_validate, "validate src/dst of ARP probes; " "0 for none (default), 1 for active, " "2 for backup, 3 for all");
module_param(arp_all_targets, charp, 0);
MODULE_PARM_DESC(arp_all_targets, "fail on any/all arp targets timeout; 0 for any (default), 1 for all");
module_param(fail_over_mac, charp, 0);
MODULE_PARM_DESC(fail_over_mac, "For active-backup, do not set all slaves to " "the same MAC; 0 for none (default), " "1 for active, 2 for follow");
module_param(all_slaves_active, int, 0);
MODULE_PARM_DESC(all_slaves_active, "Keep all frames received on an interface " "by setting active flag for all slaves; " "0 for never (default), 1 for always.");
module_param(resend_igmp, int, 0);
MODULE_PARM_DESC(resend_igmp, "Number of IGMP membership reports to send on " "link failure");
module_param(packets_per_slave, int, 0);
MODULE_PARM_DESC(packets_per_slave, "Packets to send per slave in balance-rr " "mode; 0 for a random slave, 1 packet per " "slave (default), >1 packets per slave.");
module_param(lp_interval, uint, 0);
MODULE_PARM_DESC(lp_interval, "The number of seconds between instances where " "the bonding driver sends learning packets to " "each slaves peer switch. The default is 1.");
/*----------------------------- Global variables ----------------------------*/
if (unlikely(netpoll_tx_running(bond->dev))) return bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb);
return dev_queue_xmit(skb);
}
staticbool bond_sk_check(struct bonding *bond)
{ switch (BOND_MODE(bond)) { case BOND_MODE_8023AD: case BOND_MODE_XOR: if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34) returntrue;
fallthrough; default: returnfalse;
}
}
bool bond_xdp_check(struct bonding *bond, int mode)
{ switch (mode) { case BOND_MODE_ROUNDROBIN: case BOND_MODE_ACTIVEBACKUP: returntrue; case BOND_MODE_8023AD: case BOND_MODE_XOR: /* vlan+srcmac is not supported with XDP as in most cases the 802.1q * payload is not in the packet due to hardware offload.
*/ if (bond->params.xmit_policy != BOND_XMIT_POLICY_VLAN_SRCMAC) returntrue;
fallthrough; default: returnfalse;
}
}
/* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid, * We don't protect the slave list iteration with a lock because: * a. This operation is performed in IOCTL context, * b. The operation is protected by the RTNL semaphore in the 8021q code, * c. Holding a lock with BH disabled while directly calling a base driver * entry point is generally a BAD idea. * * The design of synchronization/protection for this operation in the 8021q * module is good for one or more VLAN devices over a single physical device * and cannot be extended for a teaming solution like bonding, so there is a * potential race condition here where a net device from the vlan group might * be referenced (either by a base driver or the 8021q code) while it is being * removed from the system. However, it turns out we're not making matters * worse, and if it works for regular VLAN usage it will work here too.
*/
/** * bond_vlan_rx_add_vid - Propagates adding an id to slaves * @bond_dev: bonding net device that got called * @proto: network protocol ID * @vid: vlan id being added
*/ staticint bond_vlan_rx_add_vid(struct net_device *bond_dev,
__be16 proto, u16 vid)
{ struct bonding *bond = netdev_priv(bond_dev); struct slave *slave, *rollback_slave; struct list_head *iter; int res;
bond_for_each_slave(bond, slave, iter) {
res = vlan_vid_add(slave->dev, proto, vid); if (res) goto unwind;
}
return 0;
unwind: /* unwind to the slave that failed */
bond_for_each_slave(bond, rollback_slave, iter) { if (rollback_slave == slave) break;
vlan_vid_del(rollback_slave->dev, proto, vid);
}
return res;
}
/** * bond_vlan_rx_kill_vid - Propagates deleting an id to slaves * @bond_dev: bonding net device that got called * @proto: network protocol ID * @vid: vlan id being removed
*/ staticint bond_vlan_rx_kill_vid(struct net_device *bond_dev,
__be16 proto, u16 vid)
{ struct bonding *bond = netdev_priv(bond_dev); struct list_head *iter; struct slave *slave;
bond_for_each_slave(bond, slave, iter)
vlan_vid_del(slave->dev, proto, vid);
if (bond_is_lb(bond))
bond_alb_clear_vlan(bond, vid);
#ifdef CONFIG_XFRM_OFFLOAD /** * bond_ipsec_dev - Get active device for IPsec offload * @xs: pointer to transformer state struct * * Context: caller must hold rcu_read_lock. * * Return: the device for ipsec offload, or NULL if not exist.
**/ staticstruct net_device *bond_ipsec_dev(struct xfrm_state *xs)
{ struct net_device *bond_dev = xs->xso.dev; struct bonding *bond; struct slave *slave;
bond = netdev_priv(bond_dev); if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) return NULL;
slave = rcu_dereference(bond->curr_active_slave); if (!slave) return NULL;
if (!xs->xso.real_dev) return NULL;
if (xs->xso.real_dev != slave->dev)
pr_warn_ratelimited("%s: (slave %s): not same with IPsec offload real dev %s\n",
bond_dev->name, slave->dev->name, xs->xso.real_dev->name);
return slave->dev;
}
/** * bond_ipsec_add_sa - program device with a security association * @bond_dev: pointer to the bond net device * @xs: pointer to transformer state struct * @extack: extack point to fill failure reason
**/ staticint bond_ipsec_add_sa(struct net_device *bond_dev, struct xfrm_state *xs, struct netlink_ext_ack *extack)
{ struct net_device *real_dev;
netdevice_tracker tracker; struct bond_ipsec *ipsec; struct bonding *bond; struct slave *slave; int err;
if (!real_dev->xfrmdev_ops ||
!real_dev->xfrmdev_ops->xdo_dev_state_add ||
netif_is_bond_master(real_dev)) {
NL_SET_ERR_MSG_MOD(extack, "Slave does not support ipsec offload");
err = -EINVAL; goto out;
}
mutex_lock(&bond->ipsec_lock); if (!real_dev->xfrmdev_ops ||
!real_dev->xfrmdev_ops->xdo_dev_state_add ||
netif_is_bond_master(real_dev)) { if (!list_empty(&bond->ipsec_list))
slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_add\n",
__func__); goto out;
}
list_for_each_entry(ipsec, &bond->ipsec_list, list) { /* If new state is added before ipsec_lock acquired */ if (ipsec->xs->xso.real_dev == real_dev) continue;
if (real_dev->xfrmdev_ops->xdo_dev_state_add(real_dev,
ipsec->xs, NULL)) {
slave_warn(bond_dev, real_dev, "%s: failed to add SA\n", __func__); continue;
}
spin_lock_bh(&ipsec->xs->lock); /* xs might have been killed by the user during the migration * to the new dev, but bond_ipsec_del_sa() should have done * nothing, as xso.real_dev is NULL. * Delete it from the device we just added it to. The pending * bond_ipsec_free_sa() call will do the rest of the cleanup.
*/ if (ipsec->xs->km.state == XFRM_STATE_DEAD &&
real_dev->xfrmdev_ops->xdo_dev_state_delete)
real_dev->xfrmdev_ops->xdo_dev_state_delete(real_dev,
ipsec->xs);
ipsec->xs->xso.real_dev = real_dev;
spin_unlock_bh(&ipsec->xs->lock);
}
out:
mutex_unlock(&bond->ipsec_lock);
}
/** * bond_ipsec_del_sa - clear out this specific SA * @bond_dev: pointer to the bond net device * @xs: pointer to transformer state struct
**/ staticvoid bond_ipsec_del_sa(struct net_device *bond_dev, struct xfrm_state *xs)
{ struct net_device *real_dev;
if (!bond_dev || !xs->xso.real_dev) return;
real_dev = xs->xso.real_dev;
if (!real_dev->xfrmdev_ops ||
!real_dev->xfrmdev_ops->xdo_dev_state_delete ||
netif_is_bond_master(real_dev)) {
slave_warn(bond_dev, real_dev, "%s: no slave xdo_dev_state_delete\n", __func__); return;
}
/** * bond_ipsec_offload_ok - can this packet use the xfrm hw offload * @skb: current data packet * @xs: pointer to transformer state struct
**/ staticbool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs)
{ struct net_device *real_dev;
/*------------------------------- Link status -------------------------------*/
/* Set the carrier state for the master according to the state of its * slaves. If any slaves are up, the master is up. In 802.3ad mode, * do special 802.3ad magic. * * Returns zero if carrier state does not change, nonzero if it does.
*/ int bond_set_carrier(struct bonding *bond)
{ struct list_head *iter; struct slave *slave;
if (!bond_has_slaves(bond)) goto down;
if (BOND_MODE(bond) == BOND_MODE_8023AD) return bond_3ad_set_carrier(bond);
bond_for_each_slave(bond, slave, iter) { if (slave->link == BOND_LINK_UP) { if (!netif_carrier_ok(bond->dev)) {
netif_carrier_on(bond->dev); return 1;
} return 0;
}
}
/* Get link speed and duplex from the slave's base driver * using ethtool. If for some reason the call fails or the * values are invalid, set speed and duplex to -1, * and return. Return 1 if speed or duplex settings are * UNKNOWN; 0 otherwise.
*/ staticint bond_update_speed_duplex(struct slave *slave)
{ struct net_device *slave_dev = slave->dev; struct ethtool_link_ksettings ecmd; int res;
constchar *bond_slave_link_status(s8 link)
{ switch (link) { case BOND_LINK_UP: return"up"; case BOND_LINK_FAIL: return"going down"; case BOND_LINK_DOWN: return"down"; case BOND_LINK_BACK: return"going back"; default: return"unknown";
}
}
/* if <dev> supports MII link status reporting, check its link status. * * We either do MII/ETHTOOL ioctls, or check netif_carrier_ok(), * depending upon the setting of the use_carrier parameter. * * Return either BMSR_LSTATUS, meaning that the link is up (or we * can't tell and just pretend it is), or 0, meaning that the link is * down. * * If reporting is non-zero, instead of faking link up, return -1 if * both ETHTOOL and MII ioctls fail (meaning the device does not * support them). If use_carrier is set, return whatever it says. * It'd be nice if there was a good way to tell if a driver supports * netif_carrier, but there really isn't.
*/ staticint bond_check_dev_link(struct bonding *bond, struct net_device *slave_dev, int reporting)
{ conststruct net_device_ops *slave_ops = slave_dev->netdev_ops; struct mii_ioctl_data *mii; struct ifreq ifr; int ret;
if (!reporting && !netif_running(slave_dev)) return 0;
if (bond->params.use_carrier) return netif_carrier_ok(slave_dev) ? BMSR_LSTATUS : 0;
/* Try to get link status using Ethtool first. */ if (slave_dev->ethtool_ops->get_link) {
netdev_lock_ops(slave_dev);
ret = slave_dev->ethtool_ops->get_link(slave_dev);
netdev_unlock_ops(slave_dev);
return ret ? BMSR_LSTATUS : 0;
}
/* Ethtool can't be used, fallback to MII ioctls. */ if (slave_ops->ndo_eth_ioctl) { /* TODO: set pointer to correct ioctl on a per team member * bases to make this more efficient. that is, once * we determine the correct ioctl, we will always * call it and not the others for that team * member.
*/
/* We cannot assume that SIOCGMIIPHY will also read a * register; not all network drivers (e.g., e100) * support that.
*/
/* Yes, the mii is overlaid on the ifreq.ifr_ifru */
strscpy_pad(ifr.ifr_name, slave_dev->name, IFNAMSIZ);
mii = if_mii(&ifr);
/* If reporting, report that either there's no ndo_eth_ioctl, * or both SIOCGMIIREG and get_link failed (meaning that we * cannot report link status). If not reporting, pretend * we're ok.
*/ return reporting ? -1 : BMSR_LSTATUS;
}
/*----------------------------- Multicast list ------------------------------*/
/* Push the promiscuity flag down to appropriate slaves */ staticint bond_set_promiscuity(struct bonding *bond, int inc)
{ struct list_head *iter; int err = 0;
if (bond_uses_primary(bond)) { struct slave *curr_active = rtnl_dereference(bond->curr_active_slave);
/* Retrieve the list of registered multicast addresses for the bonding * device and retransmit an IGMP JOIN request to the current active * slave.
*/ staticvoid bond_resend_igmp_join_requests_delayed(struct work_struct *work)
{ struct bonding *bond = container_of(work, struct bonding,
mcast_work.work);
if (!rtnl_trylock()) {
queue_delayed_work(bond->wq, &bond->mcast_work, 1); return;
}
call_netdevice_notifiers(NETDEV_RESEND_IGMP, bond->dev);
if (BOND_MODE(bond) == BOND_MODE_8023AD)
dev_mc_del(slave_dev, lacpdu_mcast_addr);
}
/*--------------------------- Active slave change ---------------------------*/
/* Update the hardware address list and promisc/allmulti for the new and * old active slaves (if any). Modes that are not using primary keep all * slaves up date at all times; only the modes that use primary need to call * this function to swap these settings during a failover.
*/ staticvoid bond_hw_addr_swap(struct bonding *bond, struct slave *new_active, struct slave *old_active)
{ if (old_active) { if (bond->dev->flags & IFF_PROMISC)
dev_set_promiscuity(old_active->dev, -1);
if (bond->dev->flags & IFF_ALLMULTI)
dev_set_allmulti(old_active->dev, -1);
if (bond->dev->flags & IFF_UP)
bond_hw_addr_flush(bond->dev, old_active->dev);
bond_slave_ns_maddrs_add(bond, old_active);
}
if (new_active) { /* FIXME: Signal errors upstream. */ if (bond->dev->flags & IFF_PROMISC)
dev_set_promiscuity(new_active->dev, 1);
if (bond->dev->flags & IFF_ALLMULTI)
dev_set_allmulti(new_active->dev, 1);
/** * bond_set_dev_addr - clone slave's address to bond * @bond_dev: bond net device * @slave_dev: slave net device * * Should be called with RTNL held.
*/ staticint bond_set_dev_addr(struct net_device *bond_dev, struct net_device *slave_dev)
{ int err;
bond_for_each_slave(bond, slave, iter) { if (slave == new_active) continue;
if (ether_addr_equal(bond->dev->dev_addr, slave->dev->dev_addr)) return slave;
}
return NULL;
}
/* bond_do_fail_over_mac * * Perform special MAC address swapping for fail_over_mac settings * * Called with RTNL
*/ staticvoid bond_do_fail_over_mac(struct bonding *bond, struct slave *new_active, struct slave *old_active)
{
u8 tmp_mac[MAX_ADDR_LEN]; struct sockaddr_storage ss; int rv;
switch (bond->params.fail_over_mac) { case BOND_FOM_ACTIVE: if (new_active) {
rv = bond_set_dev_addr(bond->dev, new_active->dev); if (rv)
slave_err(bond->dev, new_active->dev, "Error %d setting bond MAC from slave\n",
-rv);
} break; case BOND_FOM_FOLLOW: /* if new_active && old_active, swap them * if just old_active, do nothing (going to no active slave) * if just new_active, set new_active to bond's MAC
*/ if (!new_active) return;
if (!old_active)
old_active = bond_get_old_active(bond, new_active);
rv = dev_set_mac_address(new_active->dev, &ss, NULL); if (rv) {
slave_err(bond->dev, new_active->dev, "Error %d setting MAC of new active slave\n",
-rv); goto out;
}
rv = dev_set_mac_address(old_active->dev, &ss, NULL); if (rv)
slave_err(bond->dev, old_active->dev, "Error %d setting MAC of old active slave\n",
-rv);
out: break; default:
netdev_err(bond->dev, "bond_do_fail_over_mac impossible: bad policy %d\n",
bond->params.fail_over_mac); break;
}
}
/** * bond_choose_primary_or_current - select the primary or high priority slave * @bond: our bonding struct * * - Check if there is a primary link. If the primary link was set and is up, * go on and do link reselection. * * - If primary link is not set or down, find the highest priority link. * If the highest priority link is not current slave, set it as primary * link and do link reselection.
*/ staticstruct slave *bond_choose_primary_or_current(struct bonding *bond)
{ struct slave *prim = rtnl_dereference(bond->primary_slave); struct slave *curr = rtnl_dereference(bond->curr_active_slave); struct slave *slave, *hprio = NULL; struct list_head *iter;
if (!prim || prim->link != BOND_LINK_UP) {
bond_for_each_slave(bond, slave, iter) { if (slave->link == BOND_LINK_UP) {
hprio = hprio ?: slave; if (slave->prio > hprio->prio)
hprio = slave;
}
}
if (hprio && hprio != curr) {
prim = hprio; goto link_reselect;
}
if (bond->force_primary) {
bond->force_primary = false; return prim;
}
link_reselect: if (!curr || curr->link != BOND_LINK_UP) return prim;
/* At this point, prim and curr are both up */ switch (bond->params.primary_reselect) { case BOND_PRI_RESELECT_ALWAYS: return prim; case BOND_PRI_RESELECT_BETTER: if (prim->speed < curr->speed) return curr; if (prim->speed == curr->speed && prim->duplex <= curr->duplex) return curr; return prim; case BOND_PRI_RESELECT_FAILURE: return curr; default:
netdev_err(bond->dev, "impossible primary_reselect %d\n",
bond->params.primary_reselect); return curr;
}
}
/** * bond_find_best_slave - select the best available slave to be the active one * @bond: our bonding struct
*/ staticstruct slave *bond_find_best_slave(struct bonding *bond)
{ struct slave *slave, *bestslave = NULL; struct list_head *iter; int mintime = bond->params.updelay;
slave = bond_choose_primary_or_current(bond); if (slave) return slave;
/* must be called in RCU critical section or with RTNL held */ staticbool bond_should_notify_peers(struct bonding *bond)
{ struct bond_up_slave *usable; struct slave *slave = NULL;
/* The send_peer_notif is set by active-backup or 8023ad * mode, and cleared in bond_close() when changing mode. * It is safe to only check bond mode here.
*/ if (BOND_MODE(bond) == BOND_MODE_8023AD) {
usable = rcu_dereference_rtnl(bond->usable_slaves); if (!usable || !READ_ONCE(usable->count)) returnfalse;
} else {
slave = rcu_dereference_rtnl(bond->curr_active_slave); if (!slave || test_bit(__LINK_STATE_LINKWATCH_PENDING,
&slave->dev->state)) returnfalse;
}
/** * bond_change_active_slave - change the active slave into the specified one * @bond: our bonding struct * @new_active: the new slave to make the active one * * Set the new slave to the bond's settings and unset them on the old * curr_active_slave. * Setting include flags, mc-list, promiscuity, allmulti, etc. * * If @new's link state is %BOND_LINK_BACK we'll set it to %BOND_LINK_UP, * because it is apparently the best available slave we have, even though its * updelay hasn't timed out yet. * * Caller must hold RTNL.
*/ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
{ struct slave *old_active;
if (new_active) {
new_active->last_link_up = jiffies;
if (new_active->link == BOND_LINK_BACK) { if (bond_uses_primary(bond)) {
slave_info(bond->dev, new_active->dev, "making interface the new active one %d ms earlier\n",
(bond->params.updelay - new_active->delay) * bond->params.miimon);
}
if (BOND_MODE(bond) == BOND_MODE_8023AD)
bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
if (bond_is_lb(bond))
bond_alb_handle_link_change(bond, new_active, BOND_LINK_UP);
} else { if (bond_uses_primary(bond))
slave_info(bond->dev, new_active->dev, "making interface the new active one\n");
}
}
if (bond_uses_primary(bond))
bond_hw_addr_swap(bond, new_active, old_active);
if (bond_is_lb(bond)) {
bond_alb_handle_active_change(bond, new_active); if (old_active)
bond_set_slave_inactive_flags(old_active,
BOND_SLAVE_NOTIFY_NOW); if (new_active)
bond_set_slave_active_flags(new_active,
BOND_SLAVE_NOTIFY_NOW);
} else {
rcu_assign_pointer(bond->curr_active_slave, new_active);
}
if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) { if (old_active)
bond_set_slave_inactive_flags(old_active,
BOND_SLAVE_NOTIFY_NOW);
if (new_active) { bool should_notify_peers = false;
/* resend IGMP joins since active slave has changed or * all were sent on curr_active_slave. * resend only if bond is brought up with the affected * bonding modes and the retransmission is enabled
*/ if (netif_running(bond->dev) && (bond->params.resend_igmp > 0) &&
((bond_uses_primary(bond) && new_active) ||
BOND_MODE(bond) == BOND_MODE_ROUNDROBIN)) {
bond->igmp_retrans = bond->params.resend_igmp;
queue_delayed_work(bond->wq, &bond->mcast_work, 1);
}
}
/** * bond_select_active_slave - select a new active slave, if needed * @bond: our bonding struct * * This functions should be called when one of the following occurs: * - The old curr_active_slave has been released or lost its link. * - The primary_slave has got its link back. * - A slave has got its link back and there's no old curr_active_slave. * * Caller must hold RTNL.
*/ void bond_select_active_slave(struct bonding *bond)
{ struct slave *best_slave; int rv;
ASSERT_RTNL();
best_slave = bond_find_best_slave(bond); if (best_slave != rtnl_dereference(bond->curr_active_slave)) {
bond_change_active_slave(bond, best_slave);
rv = bond_set_carrier(bond); if (!rv) return;
if (netif_carrier_ok(bond->dev))
netdev_info(bond->dev, "active interface up!\n"); else
netdev_info(bond->dev, "now running without any active interface!\n");
}
}
skb = skb_share_check(skb, GFP_ATOMIC); if (unlikely(!skb)) return RX_HANDLER_CONSUMED;
*pskb = skb;
slave = bond_slave_get_rcu(skb->dev);
bond = slave->bond;
recv_probe = READ_ONCE(bond->recv_probe); if (recv_probe) {
ret = recv_probe(skb, bond, slave); if (ret == RX_HANDLER_CONSUMED) {
consume_skb(skb); return ret;
}
}
/* * For packets determined by bond_should_deliver_exact_match() call to * be suppressed we want to make an exception for link-local packets. * This is necessary for e.g. LLDP daemons to be able to monitor * inactive slave links without being forced to bind to them * explicitly. * * At the same time, packets that are passed to the bonding master * (including link-local ones) can have their originating interface * determined via PACKET_ORIGDEV socket option.
*/ if (bond_should_deliver_exact_match(skb, slave, bond)) { if (is_link_local_ether_addr(eth_hdr(skb)->h_dest)) return RX_HANDLER_PASS; return RX_HANDLER_EXACT;
}
#define BOND_NL_ERR(bond_dev, extack, errmsg) do { \ if (extack) \
NL_SET_ERR_MSG(extack, errmsg); \ else \
netdev_err(bond_dev, "Error: %s\n", errmsg); \
} while (0)
#define SLAVE_NL_ERR(bond_dev, slave_dev, extack, errmsg) do { \ if (extack) \
NL_SET_ERR_MSG(extack, errmsg); \ else \
slave_err(bond_dev, slave_dev, "Error: %s\n", errmsg); \
} while (0)
/* The bonding driver uses ether_setup() to convert a master bond device * to ARPHRD_ETHER, that resets the target netdevice's flags so we always * have to restore the IFF_MASTER flag, and only restore IFF_SLAVE and IFF_UP * if they were set
*/ staticvoid bond_ether_setup(struct net_device *bond_dev)
{ unsignedint flags = bond_dev->flags & (IFF_SLAVE | IFF_UP);
if (!bond_xdp_check(bond, BOND_MODE(bond)) || !bond_has_slaves(bond)) {
xdp_clear_features_flag(bond_dev); return;
}
bond_for_each_slave(bond, slave, iter)
val &= slave->dev->xdp_features;
val &= ~NETDEV_XDP_ACT_XSK_ZEROCOPY;
xdp_set_features_flag(bond_dev, val);
}
/* enslave device <slave> to bond device <master> */ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev, struct netlink_ext_ack *extack)
{ struct bonding *bond = netdev_priv(bond_dev); conststruct net_device_ops *slave_ops = slave_dev->netdev_ops; struct slave *new_slave = NULL, *prev_slave; struct sockaddr_storage ss; int link_reporting; int res = 0, i;
if (slave_dev->flags & IFF_MASTER &&
!netif_is_bond_master(slave_dev)) {
BOND_NL_ERR(bond_dev, extack, "Device type (master device) cannot be enslaved"); return -EPERM;
}
if (!bond->params.use_carrier &&
slave_dev->ethtool_ops->get_link == NULL &&
slave_ops->ndo_eth_ioctl == NULL) {
slave_warn(bond_dev, slave_dev, "no link monitoring support\n");
}
/* already in-use? */ if (netdev_is_rx_handler_busy(slave_dev)) {
SLAVE_NL_ERR(bond_dev, slave_dev, extack, "Device is in use and cannot be enslaved"); return -EBUSY;
}
if (bond_dev == slave_dev) {
BOND_NL_ERR(bond_dev, extack, "Cannot enslave bond to itself."); return -EPERM;
}
/* vlan challenged mutual exclusion */ /* no need to lock since we're protected by rtnl_lock */ if (slave_dev->features & NETIF_F_VLAN_CHALLENGED) {
slave_dbg(bond_dev, slave_dev, "is NETIF_F_VLAN_CHALLENGED\n"); if (vlan_uses_dev(bond_dev)) {
SLAVE_NL_ERR(bond_dev, slave_dev, extack, "Can not enslave VLAN challenged device to VLAN enabled bond"); return -EPERM;
} else {
slave_warn(bond_dev, slave_dev, "enslaved VLAN challenged slave. Adding VLANs will be blocked as long as it is part of bond.\n");
}
} else {
slave_dbg(bond_dev, slave_dev, "is !NETIF_F_VLAN_CHALLENGED\n");
}
if (slave_dev->features & NETIF_F_HW_ESP)
slave_dbg(bond_dev, slave_dev, "is esp-hw-offload capable\n");
/* Old ifenslave binaries are no longer supported. These can * be identified with moderate accuracy by the state of the slave: * the current ifenslave will set the interface down prior to * enslaving it; the old ifenslave will not.
*/ if (slave_dev->flags & IFF_UP) {
SLAVE_NL_ERR(bond_dev, slave_dev, extack, "Device can not be enslaved while up"); return -EPERM;
}
/* set bonding device ether type by slave - bonding netdevices are * created with ether_setup, so when the slave type is not ARPHRD_ETHER * there is a need to override some of the type dependent attribs/funcs. * * bond ether type mutual exclusion - don't allow slaves of dissimilar * ether type (eg ARPHRD_ETHER and ARPHRD_INFINIBAND) share the same bond
*/ if (!bond_has_slaves(bond)) { if (bond_dev->type != slave_dev->type) {
slave_dbg(bond_dev, slave_dev, "change device type from %d to %d\n",
bond_dev->type, slave_dev->type);
res = call_netdevice_notifiers(NETDEV_PRE_TYPE_CHANGE,
bond_dev);
res = notifier_to_errno(res); if (res) {
slave_err(bond_dev, slave_dev, "refused to change device type\n"); return -EBUSY;
}
/* Flush unicast and multicast addresses */
dev_uc_flush(bond_dev);
dev_mc_flush(bond_dev);
if (slave_dev->type != ARPHRD_ETHER)
bond_setup_by_slave(bond_dev, slave_dev); else
bond_ether_setup(bond_dev);
call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE,
bond_dev);
}
} elseif (bond_dev->type != slave_dev->type) {
SLAVE_NL_ERR(bond_dev, slave_dev, extack, "Device type is different from other slaves"); return -EINVAL;
}
if (slave_dev->type == ARPHRD_INFINIBAND &&
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) {
SLAVE_NL_ERR(bond_dev, slave_dev, extack, "Only active-backup mode is supported for infiniband slaves");
res = -EOPNOTSUPP; goto err_undo_flags;
}
if (!slave_ops->ndo_set_mac_address ||
slave_dev->type == ARPHRD_INFINIBAND) {
slave_warn(bond_dev, slave_dev, "The slave device specified does not support setting the MAC address\n"); if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
bond->params.fail_over_mac != BOND_FOM_ACTIVE) { if (!bond_has_slaves(bond)) {
bond->params.fail_over_mac = BOND_FOM_ACTIVE;
slave_warn(bond_dev, slave_dev, "Setting fail_over_mac to active for active-backup mode\n");
} else {
SLAVE_NL_ERR(bond_dev, slave_dev, extack, "Slave device does not support setting the MAC address, but fail_over_mac is not set to active");
res = -EOPNOTSUPP; goto err_undo_flags;
}
}
}
call_netdevice_notifiers(NETDEV_JOIN, slave_dev);
/* If this is the first slave, then we need to set the master's hardware * address to be the same as the slave's.
*/ if (!bond_has_slaves(bond) &&
bond->dev->addr_assign_type == NET_ADDR_RANDOM) {
res = bond_set_dev_addr(bond->dev, slave_dev); if (res) goto err_undo_flags;
}
new_slave = bond_alloc_slave(bond, slave_dev); if (!new_slave) {
res = -ENOMEM; goto err_undo_flags;
}
/* Set the new_slave's queue_id to be zero. Queue ID mapping * is set via sysfs or module option if desired.
*/
new_slave->queue_id = 0;
/* Save slave's original mtu and then set it to match the bond */
new_slave->original_mtu = slave_dev->mtu;
res = dev_set_mtu(slave_dev, bond->dev->mtu); if (res) {
slave_err(bond_dev, slave_dev, "Error %d calling dev_set_mtu\n", res); goto err_free;
}
/* Save slave's original ("permanent") mac address for modes * that need it, and for restoring it upon release, and then * set it to the master's address
*/
bond_hw_addr_copy(new_slave->perm_hwaddr, slave_dev->dev_addr,
slave_dev->addr_len);
if (!bond->params.fail_over_mac ||
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { /* Set slave to master's mac address. The application already * set the master's mac address to that of the first slave
*/
memcpy(ss.__data, bond_dev->dev_addr, bond_dev->addr_len);
} elseif (bond->params.fail_over_mac == BOND_FOM_FOLLOW &&
BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP &&
bond_has_slaves(bond) &&
memcmp(slave_dev->dev_addr, bond_dev->dev_addr, bond_dev->addr_len) == 0) { /* Set slave to random address to avoid duplicate mac * address in later fail over.
*/
eth_random_addr(ss.__data);
} else { goto skip_mac_set;
}
/* set no_addrconf flag before open to prevent IPv6 addrconf */
slave_dev->priv_flags |= IFF_NO_ADDRCONF;
/* open the slave since the application closed it */
res = dev_open(slave_dev, extack); if (res) {
slave_err(bond_dev, slave_dev, "Opening slave failed\n"); goto err_restore_mac;
}
if (bond_is_lb(bond)) { /* bond_alb_init_slave() must be called before all other stages since * it might fail and we do not want to have to undo everything
*/
res = bond_alb_init_slave(bond, new_slave); if (res) goto err_close;
}
res = vlan_vids_add_by_dev(slave_dev, bond_dev); if (res) {
slave_err(bond_dev, slave_dev, "Couldn't add bond vlan ids\n"); goto err_close;
}
if (bond_update_speed_duplex(new_slave) &&
bond_needs_speed_duplex(bond))
new_slave->link = BOND_LINK_DOWN;
new_slave->last_rx = jiffies -
(msecs_to_jiffies(bond->params.arp_interval) + 1); for (i = 0; i < BOND_MAX_ARP_TARGETS; i++)
new_slave->target_last_arp_rx[i] = new_slave->last_rx;
new_slave->last_tx = new_slave->last_rx;
if (bond->params.miimon && !bond->params.use_carrier) {
link_reporting = bond_check_dev_link(bond, slave_dev, 1);
if ((link_reporting == -1) && !bond->params.arp_interval) { /* miimon is set but a bonded network driver * does not support ETHTOOL/MII and * arp_interval is not set. Note: if * use_carrier is enabled, we will never go * here (because netif_carrier is always * supported); thus, we don't need to change * the messages for netif_carrier.
*/
slave_warn(bond_dev, slave_dev, "MII and ETHTOOL support not available for slave, and arp_interval/arp_ip_target module parameters not specified, thus bonding will not detect link failures! see bonding.txt for details\n");
} elseif (link_reporting == -1) { /* unable get link status using mii/ethtool */
slave_warn(bond_dev, slave_dev, "can't get link status from slave; the network driver associated with this interface does not support MII or ETHTOOL link status reporting, thus miimon has no effect on this interface\n");
}
}
if (new_slave->link != BOND_LINK_DOWN)
new_slave->last_link_up = jiffies;
slave_dbg(bond_dev, slave_dev, "Initial state of slave is BOND_LINK_%s\n",
new_slave->link == BOND_LINK_DOWN ? "DOWN" :
(new_slave->link == BOND_LINK_UP ? "UP" : "BACK"));
if (bond_uses_primary(bond) && bond->params.primary[0]) { /* if there is a primary slave, remember it */ if (strcmp(bond->params.primary, new_slave->dev->name) == 0) {
rcu_assign_pointer(bond->primary_slave, new_slave);
bond->force_primary = true;
}
}
switch (BOND_MODE(bond)) { case BOND_MODE_ACTIVEBACKUP:
bond_set_slave_inactive_flags(new_slave,
BOND_SLAVE_NOTIFY_NOW); break; case BOND_MODE_8023AD: /* in 802.3ad mode, the internal mechanism * will activate the slaves in the selected * aggregator
*/
bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); /* if this is the first slave */ if (!prev_slave) {
SLAVE_AD_INFO(new_slave)->id = 1; /* Initialize AD with the number of times that the AD timer is called in 1 second * can be called only after the mac address of the bond is set
*/
bond_3ad_initialize(bond);
} else {
SLAVE_AD_INFO(new_slave)->id =
SLAVE_AD_INFO(prev_slave)->id + 1;
}
bond_3ad_bind_slave(new_slave); break; case BOND_MODE_TLB: case BOND_MODE_ALB:
bond_set_active_slave(new_slave);
bond_set_slave_inactive_flags(new_slave, BOND_SLAVE_NOTIFY_NOW); break; default:
slave_dbg(bond_dev, slave_dev, "This slave is always active in trunk mode\n");
/* always active in trunk mode */
bond_set_active_slave(new_slave);
/* In trunking mode there is little meaning to curr_active_slave * anyway (it holds no special properties of the bond device), * so we can change it without calling change_active_interface()
*/ if (!rcu_access_pointer(bond->curr_active_slave) &&
new_slave->link == BOND_LINK_UP)
rcu_assign_pointer(bond->curr_active_slave, new_slave);
break;
} /* switch(bond_mode) */
#ifdef CONFIG_NET_POLL_CONTROLLER if (bond->dev->npinfo) { if (slave_enable_netpoll(new_slave)) {
slave_info(bond_dev, slave_dev, "master_dev is using netpoll, but new slave device does not support netpoll\n");
res = -EBUSY; goto err_detach;
}
} #endif
if (!(bond_dev->features & NETIF_F_LRO))
dev_disable_lro(slave_dev);
res = netdev_rx_handler_register(slave_dev, bond_handle_frame,
new_slave); if (res) {
slave_dbg(bond_dev, slave_dev, "Error %d calling netdev_rx_handler_register\n", res); goto err_detach;
}
res = bond_master_upper_dev_link(bond, new_slave, extack); if (res) {
slave_dbg(bond_dev, slave_dev, "Error %d calling bond_master_upper_dev_link\n", res); goto err_unregister;
}
bond_lower_state_changed(new_slave);
res = bond_sysfs_slave_add(new_slave); if (res) {
slave_dbg(bond_dev, slave_dev, "Error %d calling bond_sysfs_slave_add\n", res); goto err_upper_unlink;
}
/* If the mode uses primary, then the following is handled by * bond_change_active_slave().
*/ if (!bond_uses_primary(bond)) { /* set promiscuity level to new slave */ if (bond_dev->flags & IFF_PROMISC) {
res = dev_set_promiscuity(slave_dev, 1); if (res) goto err_sysfs_del;
}
/* set allmulti level to new slave */ if (bond_dev->flags & IFF_ALLMULTI) {
res = dev_set_allmulti(slave_dev, 1); if (res) { if (bond_dev->flags & IFF_PROMISC)
dev_set_promiscuity(slave_dev, -1); goto err_sysfs_del;
}
}
/* Needs to be called before bond_select_active_slave(), which will * remove the maddrs if the slave is selected as active slave.
*/
bond_slave_ns_maddrs_add(bond, new_slave);
if (bond_uses_primary(bond)) {
block_netpoll_tx();
bond_select_active_slave(bond);
unblock_netpoll_tx();
}
/* broadcast mode uses the all_slaves to loop through slaves. */ if (bond_mode_can_use_xmit_hash(bond) ||
BOND_MODE(bond) == BOND_MODE_BROADCAST)
bond_update_slave_arr(bond, NULL);
if (!slave_dev->netdev_ops->ndo_bpf ||
!slave_dev->netdev_ops->ndo_xdp_xmit) { if (bond->xdp_prog) {
SLAVE_NL_ERR(bond_dev, slave_dev, extack, "Slave does not support XDP");
res = -EOPNOTSUPP; goto err_sysfs_del;
}
} elseif (bond->xdp_prog) { struct netdev_bpf xdp = {
.command = XDP_SETUP_PROG,
.flags = 0,
.prog = bond->xdp_prog,
.extack = extack,
};
if (dev_xdp_prog_count(slave_dev) > 0) {
SLAVE_NL_ERR(bond_dev, slave_dev, extack, "Slave has XDP program loaded, please unload before enslaving");
res = -EOPNOTSUPP; goto err_sysfs_del;
}
res = dev_xdp_propagate(slave_dev, &xdp); if (res < 0) { /* ndo_bpf() sets extack error message */
slave_dbg(bond_dev, slave_dev, "Error %d calling ndo_bpf\n", res); goto err_sysfs_del;
} if (bond->xdp_prog)
bpf_prog_inc(bond->xdp_prog);
}
bond_xdp_set_features(bond_dev);
slave_info(bond_dev, slave_dev, "Enslaving as %s interface with %s link\n",
bond_is_active_slave(new_slave) ? "an active" : "a backup",
new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
/* enslave is successful */
bond_queue_slave_event(new_slave); return 0;
/* Undo stages on error */
err_sysfs_del:
bond_sysfs_slave_del(new_slave);
err_detach:
vlan_vids_del_by_dev(slave_dev, bond_dev); if (rcu_access_pointer(bond->primary_slave) == new_slave)
RCU_INIT_POINTER(bond->primary_slave, NULL); if (rcu_access_pointer(bond->curr_active_slave) == new_slave) {
block_netpoll_tx();
bond_change_active_slave(bond, NULL);
bond_select_active_slave(bond);
unblock_netpoll_tx();
} /* either primary_slave or curr_active_slave might've changed */
synchronize_rcu();
slave_disable_netpoll(new_slave);
err_close: if (!netif_is_bond_master(slave_dev))
slave_dev->priv_flags &= ~IFF_BONDING;
dev_close(slave_dev);
err_restore_mac:
slave_dev->priv_flags &= ~IFF_NO_ADDRCONF; if (!bond->params.fail_over_mac ||
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { /* XXX TODO - fom follow mode needs to change master's * MAC if this slave's MAC is in use by the bond, or at * least print a warning.
*/
bond_hw_addr_copy(ss.__data, new_slave->perm_hwaddr,
new_slave->dev->addr_len);
ss.ss_family = slave_dev->type;
dev_set_mac_address(slave_dev, &ss, NULL);
}
err_undo_flags: /* Enslave of first slave has failed and we need to fix master's mac */ if (!bond_has_slaves(bond)) { if (ether_addr_equal_64bits(bond_dev->dev_addr,
slave_dev->dev_addr))
eth_hw_addr_random(bond_dev); if (bond_dev->type != ARPHRD_ETHER) {
dev_close(bond_dev);
bond_ether_setup(bond_dev);
}
}
return res;
}
/* Try to release the slave device <slave> from the bond device <master> * It is legal to access curr_active_slave without a lock because all the function * is RTNL-locked. If "all" is true it means that the function is being called * while destroying a bond interface and all slaves are being released. * * The rules for slave state should be: * for Active/Backup: * Active stays on all backups go down * for Bonded connections: * The first up interface should be left on and all others downed.
*/ staticint __bond_release_one(struct net_device *bond_dev, struct net_device *slave_dev, bool all, bool unregister)
{ struct bonding *bond = netdev_priv(bond_dev); struct slave *slave, *oldcurrent; struct sockaddr_storage ss; int old_flags = bond_dev->flags;
netdev_features_t old_features = bond_dev->features;
/* slave is not a slave or master is not master of this slave */ if (!(slave_dev->flags & IFF_SLAVE) ||
!netdev_has_upper_dev(slave_dev, bond_dev)) {
slave_dbg(bond_dev, slave_dev, "cannot release slave\n"); return -EINVAL;
}
block_netpoll_tx();
slave = bond_get_slave_by_dev(bond, slave_dev); if (!slave) { /* not a slave of this bond */
slave_info(bond_dev, slave_dev, "interface not enslaved\n");
unblock_netpoll_tx(); return -EINVAL;
}
if (!all && (bond->params.fail_over_mac != BOND_FOM_ACTIVE ||
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP)) { if (ether_addr_equal_64bits(bond_dev->dev_addr, slave->perm_hwaddr) &&
bond_has_slaves(bond))
slave_warn(bond_dev, slave_dev, "the permanent HWaddr of slave - %pM - is still in use by bond - set the HWaddr of slave to a different address to avoid conflicts\n",
slave->perm_hwaddr);
}
if (rtnl_dereference(bond->primary_slave) == slave)
RCU_INIT_POINTER(bond->primary_slave, NULL);
if (oldcurrent == slave)
bond_change_active_slave(bond, NULL);
/* Must be called after bond_change_active_slave () as the slave * might change from an active slave to a backup slave. Then it is * necessary to clear the maddrs on the backup slave.
*/
bond_slave_ns_maddrs_del(bond, slave);
if (bond_is_lb(bond)) { /* Must be called only after the slave has been * detached from the list and the curr_active_slave * has been cleared (if our_slave == old_current), * but before a new active slave is selected.
*/
bond_alb_deinit_slave(bond, slave);
}
if (all) {
RCU_INIT_POINTER(bond->curr_active_slave, NULL);
} elseif (oldcurrent == slave) { /* Note that we hold RTNL over this sequence, so there * is no concern that another slave add/remove event * will interfere.
*/
bond_select_active_slave(bond);
}
bond_set_carrier(bond); if (!bond_has_slaves(bond))
eth_hw_addr_random(bond_dev);
if (!bond_has_slaves(bond)) {
call_netdevice_notifiers(NETDEV_CHANGEADDR, bond->dev);
call_netdevice_notifiers(NETDEV_RELEASE, bond->dev);
}
bond_compute_features(bond); if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) &&
(old_features & NETIF_F_VLAN_CHALLENGED))
slave_info(bond_dev, slave_dev, "last VLAN challenged slave left bond - VLAN blocking is removed\n");
vlan_vids_del_by_dev(slave_dev, bond_dev);
/* If the mode uses primary, then this case was handled above by * bond_change_active_slave(..., NULL)
*/ if (!bond_uses_primary(bond)) { /* unset promiscuity level from slave * NOTE: The NETDEV_CHANGEADDR call above may change the value * of the IFF_PROMISC flag in the bond_dev, but we need the * value of that flag before that change, as that was the value * when this slave was attached, so we cache at the start of the * function and use it here. Same goes for ALLMULTI below
*/ if (old_flags & IFF_PROMISC)
dev_set_promiscuity(slave_dev, -1);
/* unset allmulti level from slave */ if (old_flags & IFF_ALLMULTI)
dev_set_allmulti(slave_dev, -1);
if (old_flags & IFF_UP)
bond_hw_addr_flush(bond_dev, slave_dev);
}
slave_disable_netpoll(slave);
/* close slave before restoring its mac address */
dev_close(slave_dev);
slave_dev->priv_flags &= ~IFF_NO_ADDRCONF;
if (bond->params.fail_over_mac != BOND_FOM_ACTIVE ||
BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { /* restore original ("permanent") mac address */
bond_hw_addr_copy(ss.__data, slave->perm_hwaddr,
slave->dev->addr_len);
ss.ss_family = slave_dev->type;
dev_set_mac_address(slave_dev, &ss, NULL);
}
/* A wrapper used because of ndo_del_link */ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
{ return __bond_release_one(bond_dev, slave_dev, false, false);
}
/* First release a slave and then destroy the bond if no more slaves are left. * Must be under rtnl_lock when this function is called.
*/ staticint bond_release_and_destroy(struct net_device *bond_dev, struct net_device *slave_dev)
{ struct bonding *bond = netdev_priv(bond_dev); int ret;
if (slave->delay && net_ratelimit()) {
slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n",
ignore_updelay ? 0 :
bond->params.updelay *
bond->params.miimon);
}
fallthrough; case BOND_LINK_BACK: if (!link_state) {
bond_propose_link_state(slave, BOND_LINK_DOWN); if (net_ratelimit())
slave_info(bond->dev, slave->dev, "link status down again after %d ms\n",
(bond->params.updelay - slave->delay) *
bond->params.miimon);
commit++; continue;
}
bond_for_each_slave(bond, slave, iter) { switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: /* For 802.3ad mode, check current slave speed and * duplex again in case its port was disabled after * invalid speed/duplex reporting but recovered before * link monitoring could make a decision on the actual * link status
*/ if (BOND_MODE(bond) == BOND_MODE_8023AD &&
slave->link == BOND_LINK_UP)
bond_3ad_adapter_speed_duplex_changed(slave); continue;
case BOND_LINK_UP: if (bond_update_speed_duplex(slave) &&
bond_needs_speed_duplex(bond)) {
slave->link = BOND_LINK_DOWN; if (net_ratelimit())
slave_warn(bond->dev, slave->dev, "failed to get link speed/duplex\n"); continue;
}
bond_set_slave_link_state(slave, BOND_LINK_UP,
BOND_SLAVE_NOTIFY_NOW);
slave->last_link_up = jiffies;
primary = rtnl_dereference(bond->primary_slave); if (BOND_MODE(bond) == BOND_MODE_8023AD) { /* prevent it from being the active one */
bond_set_backup_slave(slave);
} elseif (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { /* make it immediately active */
bond_set_active_slave(slave);
}
if (slave == rcu_access_pointer(bond->curr_active_slave))
do_failover = true;
continue;
default:
slave_err(bond->dev, slave->dev, "invalid new link %d on slave\n",
slave->link_new_state);
bond_propose_link_state(slave, BOND_LINK_NOCHANGE);
continue;
}
}
if (do_failover) {
block_netpoll_tx();
bond_select_active_slave(bond);
unblock_netpoll_tx();
}
bond_set_carrier(bond);
}
/* bond_mii_monitor * * Really a wrapper that splits the mii monitor into two phases: an * inspection, then (if inspection indicates something needs to be done) * an acquisition of appropriate locks followed by a commit phase to * implement whatever link state changes are indicated.
*/ staticvoid bond_mii_monitor(struct work_struct *work)
{ struct bonding *bond = container_of(work, struct bonding,
mii_work.work); bool should_notify_peers; bool commit; unsignedlong delay; struct slave *slave; struct list_head *iter;
if (!tags || tags->vlan_proto == BOND_VLAN_PROTO_NONE) returntrue;
tags++;
/* Go through all the tags backwards and add them to the packet */ while (tags->vlan_proto != BOND_VLAN_PROTO_NONE) { if (!tags->vlan_id) {
tags++; continue;
}
slave_dbg(bond_dev, slave_dev, "inner tag: proto %X vid %X\n",
ntohs(outer_tag->vlan_proto), tags->vlan_id);
skb = vlan_insert_tag_set_proto(skb, tags->vlan_proto,
tags->vlan_id); if (!skb) {
net_err_ratelimited("failed to insert inner VLAN tag\n"); returnfalse;
}
tags++;
} /* Set the outer tag */ if (outer_tag->vlan_id) {
slave_dbg(bond_dev, slave_dev, "outer tag: proto %X vid %X\n",
ntohs(outer_tag->vlan_proto), outer_tag->vlan_id);
__vlan_hwaccel_put_tag(skb, outer_tag->vlan_proto,
outer_tag->vlan_id);
}
returntrue;
}
/* We go to the (large) trouble of VLAN tagging ARP frames because * switches in VLAN mode (especially if ports are configured as * "native" to a VLAN) might not pass non-tagged frames.
*/ staticvoid bond_arp_send(struct slave *slave, int arp_op, __be32 dest_ip,
__be32 src_ip, struct bond_vlan_tag *tags)
{ struct net_device *bond_dev = slave->bond->dev; struct net_device *slave_dev = slave->dev; struct sk_buff *skb;
if (!skb) {
net_err_ratelimited("ARP packet allocation failed\n"); return;
}
if (bond_handle_vlan(slave, tags, skb)) {
slave_update_last_tx(slave);
arp_xmit(skb);
}
return;
}
/* Validate the device path between the @start_dev and the @end_dev. * The path is valid if the @end_dev is reachable through device * stacking. * When the path is validated, collect any vlan information in the * path.
*/ struct bond_vlan_tag *bond_verify_device_path(struct net_device *start_dev, struct net_device *end_dev, int level)
{ struct bond_vlan_tag *tags; struct net_device *upper; struct list_head *iter;
for (i = 0; i < BOND_MAX_ARP_TARGETS && targets[i]; i++) {
slave_dbg(bond->dev, slave->dev, "%s: target %pI4\n",
__func__, &targets[i]);
tags = NULL;
/* Find out through which dev should the packet go */
rt = ip_route_output(dev_net(bond->dev), targets[i], 0, 0, 0,
RT_SCOPE_LINK); if (IS_ERR(rt)) { /* there's no route to target - try to send arp * probe to generate any traffic (arp_validate=0)
*/ if (bond->params.arp_validate)
pr_warn_once("%s: no route to arp_ip_target %pI4 and arp_validate is set\n",
bond->dev->name,
&targets[i]);
bond_arp_send(slave, ARPOP_REQUEST, targets[i],
0, tags); continue;
}
/* bond device itself */ if (rt->dst.dev == bond->dev) goto found;
if (!sip || !bond_has_this_ip(bond, tip)) {
slave_dbg(bond->dev, slave->dev, "%s: sip %pI4 tip %pI4 not found\n",
__func__, &sip, &tip); return;
}
i = bond_get_targets_ip(bond->params.arp_targets, sip); if (i == -1) {
slave_dbg(bond->dev, slave->dev, "%s: sip %pI4 not found in targets\n",
__func__, &sip); return;
}
slave->last_rx = jiffies;
slave->target_last_arp_rx[i] = jiffies;
}
/* We 'trust' the received ARP enough to validate it if: * * (a) the slave receiving the ARP is active (which includes the * current ARP slave, if any), or * * (b) the receiving slave isn't active, but there is a currently * active slave and it received valid arp reply(s) after it became * the currently active slave, or * * (c) there is an ARP slave that sent an ARP during the prior ARP * interval, and we receive an ARP reply on any slave. We accept * these because switch FDB update delays may deliver the ARP * reply to a slave other than the sender of the ARP request. * * Note: for (b), backup slaves are receiving the broadcast ARP * request, not a reply. This request passes from the sending * slave through the L2 switch(es) to the receiving slave. Since * this is checking the request, sip/tip are swapped for * validation. * * This is done to avoid endless looping when we can't reach the * arp_ip_target and fool ourselves with our own arp requests.
*/ if (bond_is_active_slave(slave))
bond_validate_arp(bond, slave, sip, tip); elseif (curr_active_slave &&
time_after(slave_last_rx(bond, curr_active_slave),
curr_active_slave->last_link_up))
bond_validate_arp(bond, slave, tip, sip); elseif (curr_arp_slave && (arp->ar_op == htons(ARPOP_REPLY)) &&
bond_time_in_interval(bond, slave_last_tx(curr_arp_slave), 1))
bond_validate_arp(bond, slave, sip, tip);
for (i = 0; i < BOND_MAX_NS_TARGETS && !ipv6_addr_any(&targets[i]); i++) {
slave_dbg(bond->dev, slave->dev, "%s: target %pI6c\n",
__func__, &targets[i]);
tags = NULL;
/* Find out through which dev should the packet go */
memset(&fl6, 0, sizeof(struct flowi6));
fl6.daddr = targets[i];
dst = ip6_route_output(dev_net(bond->dev), NULL, &fl6); if (dst->error) {
dst_release(dst); /* there's no route to target - try to send arp * probe to generate any traffic (arp_validate=0)
*/ if (bond->params.arp_validate)
pr_warn_once("%s: no route to ns_ip6_target %pI6c and arp_validate is set\n",
bond->dev->name,
&targets[i]);
bond_ns_send(slave, &targets[i], &in6addr_any, tags); continue;
}
/* bond device itself */ if (dst->dev == bond->dev) goto found;
/* Ignore NAs that: * 1. Source address is unspecified address. * 2. Dest address is neither all-nodes multicast address nor * exist on bond interface.
*/ if (ipv6_addr_any(saddr) ||
(!ipv6_addr_equal(daddr, &in6addr_linklocal_allnodes) &&
!bond_has_this_ip6(bond, daddr))) {
slave_dbg(bond->dev, slave->dev, "%s: sip %pI6c tip %pI6c not found\n",
__func__, saddr, daddr); return;
}
i = bond_get_targets_ip6(bond->params.ns_targets, saddr); if (i == -1) {
slave_dbg(bond->dev, slave->dev, "%s: sip %pI6c not found in targets\n",
__func__, saddr); return;
}
slave->last_rx = jiffies;
slave->target_last_arp_rx[i] = jiffies;
}
/* function to verify if we're in the arp_interval timeslice, returns true if * (last_act - arp_interval) <= jiffies <= (last_act + mod * arp_interval + * arp_interval/2) . the arp_interval/2 is needed for really fast networks.
*/ staticbool bond_time_in_interval(struct bonding *bond, unsignedlong last_act, int mod)
{ int delta_in_ticks = msecs_to_jiffies(bond->params.arp_interval);
/* This function is called regularly to monitor each slave's link * ensuring that traffic is being sent and received when arp monitoring * is used in load-balancing mode. if the adapter has been dormant, then an * arp is transmitted to generate traffic. see activebackup_arp_monitor for * arp monitoring in active backup mode.
*/ staticvoid bond_loadbalance_arp_mon(struct bonding *bond)
{ struct slave *slave, *oldcurrent; struct list_head *iter; int do_failover = 0, slave_state_changed = 0;
if (!bond_has_slaves(bond)) goto re_arm;
rcu_read_lock();
oldcurrent = rcu_dereference(bond->curr_active_slave); /* see if any of the previous devices are up now (i.e. they have * xmt and rcv traffic). the curr_active_slave does not come into * the picture unless it is null. also, slave->last_link_up is not * needed here because we send an arp on each slave and give a slave * as long as it needs to get the tx/rx within the delta. * TODO: what about up/down delay in arp mode? it wasn't here before * so it can wait
*/
bond_for_each_slave_rcu(bond, slave, iter) { unsignedlong last_tx = slave_last_tx(slave);
/* primary_slave has no meaning in round-robin * mode. the window of a slave being up and * curr_active_slave being null after enslaving * is closed.
*/ if (!oldcurrent) {
slave_info(bond->dev, slave->dev, "link status definitely up\n");
do_failover = 1;
} else {
slave_info(bond->dev, slave->dev, "interface is now up\n");
}
}
} else { /* slave->link == BOND_LINK_UP */
/* not all switches will respond to an arp request * when the source ip is 0, so don't take the link down * if we don't know our ip yet
*/ if (!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
!bond_time_in_interval(bond, slave->last_rx, bond->params.missed_max)) {
if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++;
slave_info(bond->dev, slave->dev, "interface is now down\n");
if (slave == oldcurrent)
do_failover = 1;
}
}
/* note: if switch is in round-robin mode, all links * must tx arp to ensure all links rx an arp - otherwise * links may oscillate or not come up at all; if switch is * in something like xor mode, there is nothing we can * do - all replies will be rx'ed on same link causing slaves * to be unstable during low/no traffic periods
*/ if (bond_slave_is_up(slave))
bond_send_validate(bond, slave);
}
rcu_read_unlock();
if (do_failover || slave_state_changed) { if (!rtnl_trylock()) goto re_arm;
if (slave_state_changed) {
bond_slave_state_change(bond); if (BOND_MODE(bond) == BOND_MODE_XOR)
bond_update_slave_arr(bond, NULL);
} if (do_failover) {
block_netpoll_tx();
bond_select_active_slave(bond);
unblock_netpoll_tx();
}
rtnl_unlock();
}
re_arm: if (bond->params.arp_interval)
queue_delayed_work(bond->wq, &bond->arp_work,
msecs_to_jiffies(bond->params.arp_interval));
}
/* Called to inspect slaves for active-backup mode ARP monitor link state * changes. Sets proposed link state in slaves to specify what action * should take place for the slave. Returns 0 if no changes are found, >0 * if changes to link states must be committed. * * Called with rcu_read_lock held.
*/ staticint bond_ab_arp_inspect(struct bonding *bond)
{ unsignedlong last_tx, last_rx; struct list_head *iter; struct slave *slave; int commit = 0;
/* Give slaves 2*delta after being enslaved or made * active. This avoids bouncing, as the last receive * times need a full ARP monitor cycle to be updated.
*/ if (bond_time_in_interval(bond, slave->last_link_up, 2)) continue;
/* Backup slave is down if: * - No current_arp_slave AND * - more than (missed_max+1)*delta since last receive AND * - the bond has an IP address * * Note: a non-null current_arp_slave indicates * the curr_active_slave went down and we are * searching for a new one; under this condition * we only take the curr_active_slave down - this * gives each slave a chance to tx/rx traffic * before being taken out
*/ if (!bond_is_active_slave(slave) &&
!rcu_access_pointer(bond->current_arp_slave) &&
!bond_time_in_interval(bond, last_rx, bond->params.missed_max + 1)) {
bond_propose_link_state(slave, BOND_LINK_DOWN);
commit++;
}
/* Active slave is down if: * - more than missed_max*delta since transmitting OR * - (more than missed_max*delta since receive AND * the bond has an IP address)
*/
last_tx = slave_last_tx(slave); if (bond_is_active_slave(slave) &&
(!bond_time_in_interval(bond, last_tx, bond->params.missed_max) ||
!bond_time_in_interval(bond, last_rx, bond->params.missed_max))) {
bond_propose_link_state(slave, BOND_LINK_DOWN);
commit++;
}
}
return commit;
}
/* Called to commit link state changes noted by inspection step of * active-backup mode ARP monitor. * * Called with RTNL hold.
*/ staticvoid bond_ab_arp_commit(struct bonding *bond)
{ bool do_failover = false; struct list_head *iter; unsignedlong last_tx; struct slave *slave;
bond_for_each_slave(bond, slave, iter) { switch (slave->link_new_state) { case BOND_LINK_NOCHANGE: continue;
case BOND_LINK_UP:
last_tx = slave_last_tx(slave); if (rtnl_dereference(bond->curr_active_slave) != slave ||
(!rtnl_dereference(bond->curr_active_slave) &&
bond_time_in_interval(bond, last_tx, 1))) { struct slave *current_arp_slave;
case BOND_LINK_FAIL:
bond_set_slave_link_state(slave, BOND_LINK_FAIL,
BOND_SLAVE_NOTIFY_NOW);
bond_set_slave_inactive_flags(slave,
BOND_SLAVE_NOTIFY_NOW);
/* A slave has just been enslaved and has become * the current active slave.
*/ if (rtnl_dereference(bond->curr_active_slave))
RCU_INIT_POINTER(bond->current_arp_slave, NULL); continue;
if (curr_arp_slave && curr_active_slave)
netdev_info(bond->dev, "PROBE: c_arp %s && cas %s BAD\n",
curr_arp_slave->dev->name,
curr_active_slave->dev->name);
if (curr_active_slave) {
bond_send_validate(bond, curr_active_slave); return should_notify_rtnl;
}
/* if we don't have a curr_active_slave, search for the next available * backup slave from the current_arp_slave and make it the candidate * for becoming the curr_active_slave
*/
if (!curr_arp_slave) {
curr_arp_slave = bond_first_slave_rcu(bond); if (!curr_arp_slave) return should_notify_rtnl;
}
bond_for_each_slave_rcu(bond, slave, iter) { if (!found && !before && bond_slave_is_up(slave))
before = slave;
if (found && !new_slave && bond_slave_is_up(slave))
new_slave = slave; /* if the link state is up at this point, we * mark it down - this can happen if we have * simultaneous link failures and * reselect_active_interface doesn't make this * one the current slave so it is still marked * up when it is actually down
*/ if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
bond_set_slave_link_state(slave, BOND_LINK_DOWN,
BOND_SLAVE_NOTIFY_LATER); if (slave->link_failure_count < UINT_MAX)
slave->link_failure_count++;
/* A netdev event can be generated while enslaving a device * before netdev_rx_handler_register is called in which case * slave will be NULL
*/ if (!slave) {
netdev_dbg(slave_dev, "%s called on NULL slave\n", __func__); return NOTIFY_DONE;
}
bond_dev = slave->bond->dev;
bond = slave->bond;
primary = rtnl_dereference(bond->primary_slave);
switch (event) { case NETDEV_UNREGISTER: if (bond_dev->type != ARPHRD_ETHER)
bond_release_and_destroy(bond_dev, slave_dev); else
__bond_release_one(bond_dev, slave_dev, false, true); break; case NETDEV_UP: case NETDEV_CHANGE: /* For 802.3ad mode only: * Getting invalid Speed/Duplex values here will put slave * in weird state. Mark it as link-fail if the link was * previously up or link-down if it hasn't yet come up, and * let link-monitoring (miimon) set it right when correct * speeds/duplex are available.
*/ if (bond_update_speed_duplex(slave) &&
BOND_MODE(bond) == BOND_MODE_8023AD) { if (slave->last_link_up)
slave->link = BOND_LINK_FAIL; else
slave->link = BOND_LINK_DOWN;
}
if (BOND_MODE(bond) == BOND_MODE_8023AD)
bond_3ad_adapter_speed_duplex_changed(slave);
fallthrough; case NETDEV_DOWN: /* Refresh slave-array if applicable! * If the setup does not use miimon or arpmon (mode-specific!), * then these events will not cause the slave-array to be * refreshed. This will cause xmit to use a slave that is not * usable. Avoid such situation by refeshing the array at these * events. If these (miimon/arpmon) parameters are configured * then array gets refreshed twice and that should be fine!
*/ if (bond_mode_can_use_xmit_hash(bond))
bond_update_slave_arr(bond, NULL); break; case NETDEV_CHANGEMTU: /* TODO: Should slaves be allowed to * independently alter their MTU? For * an active-backup bond, slaves need * not be the same type of device, so * MTUs may vary. For other modes, * slaves arguably should have the * same MTUs. To do this, we'd need to * take over the slave's change_mtu * function for the duration of their * servitude.
*/ break; case NETDEV_CHANGENAME: /* we don't care if we don't have primary set */ if (!bond_uses_primary(bond) ||
!bond->params.primary[0]) break;
if (slave == primary) { /* slave's name changed - he's no longer primary */
RCU_INIT_POINTER(bond->primary_slave, NULL);
} elseif (!strcmp(slave_dev->name, bond->params.primary)) { /* we have a new primary slave */
rcu_assign_pointer(bond->primary_slave, slave);
} else { /* we didn't change primary - exit */ break;
}
netdev_info(bond->dev, "Primary slave changed to %s, reselecting active slave\n",
primary ? slave_dev->name : "none");
block_netpoll_tx();
bond_select_active_slave(bond);
unblock_netpoll_tx(); break; case NETDEV_FEAT_CHANGE: if (!bond->notifier_ctx) {
bond->notifier_ctx = true;
bond_compute_features(bond);
bond->notifier_ctx = false;
} break; case NETDEV_RESEND_IGMP: /* Propagate to master device */
call_netdevice_notifiers(event, slave->bond->dev); break; case NETDEV_XDP_FEAT_CHANGE:
bond_xdp_set_features(bond_dev); break; default: break;
}
return NOTIFY_DONE;
}
/* bond_netdev_event: handle netdev notifier chain events. * * This function receives events for the netdev chain. The caller (an * ioctl handler calling blocking_notifier_call_chain) holds the necessary * locks for us to safely manipulate the slave devices (RTNL lock, * dev_probe_lock).
*/ staticint bond_netdev_event(struct notifier_block *this, unsignedlong event, void *ptr)
{ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
netdev_dbg(event_dev, "%s received %s\n",
__func__, netdev_cmd_to_name(event));
if (!(event_dev->priv_flags & IFF_BONDING)) return NOTIFY_DONE;
if (event_dev->flags & IFF_MASTER) { int ret;
ret = bond_master_netdev_event(event, event_dev); if (ret != NOTIFY_DONE) return ret;
}
if (event_dev->flags & IFF_SLAVE) return bond_slave_netdev_event(event, event_dev);
/* Helper to access data in a packet, with or without a backing skb. * If skb is given the data is linearized if necessary via pskb_may_pull.
*/ staticinlineconstvoid *bond_pull_data(struct sk_buff *skb, constvoid *data, int hlen, int n)
{ if (likely(n <= hlen)) return data; elseif (skb && likely(pskb_may_pull(skb, n))) return skb->data;
return NULL;
}
/* L2 hash helper */ staticinline u32 bond_eth_hash(struct sk_buff *skb, constvoid *data, int mhoff, int hlen)
{ struct ethhdr *ep;
data = bond_pull_data(skb, data, hlen, mhoff + sizeof(struct ethhdr)); if (!data) return 0;
/* ICMP error packets contains at least 8 bytes of the header * of the packet which generated the error. Use this information * to correlate ICMP error packets within the same flow which * generated the error.
*/ if (ip_proto == IPPROTO_ICMP || ip_proto == IPPROTO_ICMPV6) {
skb_flow_get_icmp_tci(skb, &fk->icmp, data, nhoff, hlen); if (ip_proto == IPPROTO_ICMP) { if (!icmp_is_err(fk->icmp.type)) returntrue;
/* discard lowest hash bit to deal with the common even ports pattern */ if (xmit_policy == BOND_XMIT_POLICY_LAYER34 ||
xmit_policy == BOND_XMIT_POLICY_ENCAP34) return hash >> 1;
return hash;
}
/* Generate hash based on xmit policy. If @skb is given it is used to linearize * the data as required, but this function can be used without it if the data is * known to be linear (e.g. with xdp_buff).
*/ static u32 __bond_xmit_hash(struct bonding *bond, struct sk_buff *skb, constvoid *data,
__be16 l2_proto, int mhoff, int nhoff, int hlen)
{ struct flow_keys flow;
u32 hash;
if (bond->params.xmit_policy == BOND_XMIT_POLICY_VLAN_SRCMAC) return bond_vlan_srcmac_hash(skb, data, mhoff, hlen);
/** * bond_xmit_hash - generate a hash value based on the xmit policy * @bond: bonding device * @skb: buffer to use for headers * * This function will extract the necessary headers from the skb buffer and use * them to generate a hash based on the xmit_policy set in the bonding device
*/
u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
{ if (bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP34 &&
skb->l4_hash) return skb->hash;
/** * bond_xmit_hash_xdp - generate a hash value based on the xmit policy * @bond: bonding device * @xdp: buffer to use for headers * * The XDP variant of bond_xmit_hash.
*/ static u32 bond_xmit_hash_xdp(struct bonding *bond, struct xdp_buff *xdp)
{ struct ethhdr *eth;
if (xdp->data + sizeof(struct ethhdr) > xdp->data_end) return 0;
if (bond_is_lb(bond)) { /* bond_alb_initialize must be called before the timer * is started.
*/ if (bond_alb_initialize(bond, (BOND_MODE(bond) == BOND_MODE_ALB))) return -ENOMEM; if (bond->params.tlb_dynamic_lb || BOND_MODE(bond) == BOND_MODE_ALB)
queue_delayed_work(bond->wq, &bond->alb_work, 0);
}
if (bond->params.miimon) /* link check interval, in milliseconds. */
queue_delayed_work(bond->wq, &bond->mii_work, 0);
if (bond->params.arp_interval) { /* arp interval, in milliseconds. */
queue_delayed_work(bond->wq, &bond->arp_work, 0);
bond->recv_probe = bond_rcv_validate;
}
if (BOND_MODE(bond) == BOND_MODE_8023AD) {
queue_delayed_work(bond->wq, &bond->ad_work, 0); /* register to receive LACPDUs */
bond->recv_probe = bond_3ad_lacpdu_recv;
bond_3ad_initiate_agg_selection(bond, 1);
switch (cmd) { case SIOCGMIIPHY:
mii = if_mii(ifr); if (!mii) return -EINVAL;
mii->phy_id = 0;
fallthrough; case SIOCGMIIREG: /* We do this again just in case we were called by SIOCGMIIREG * instead of SIOCGMIIPHY.
*/
mii = if_mii(ifr); if (!mii) return -EINVAL;
if (mii->reg_num == 1) {
mii->val_out = 0; if (netif_carrier_ok(bond->dev))
mii->val_out = BMSR_LSTATUS;
}
switch (cmd) { case SIOCBONDENSLAVE:
res = bond_enslave(bond_dev, slave_dev, NULL); break; case SIOCBONDRELEASE:
res = bond_release(bond_dev, slave_dev); break; case SIOCBONDSETHWADDR:
res = bond_set_dev_addr(bond_dev, slave_dev); break; case SIOCBONDCHANGEACTIVE:
bond_opt_initstr(&newval, slave_dev->name);
res = __bond_opt_set_notify(bond, BOND_OPT_ACTIVE_SLAVE,
&newval); break; default:
res = -EOPNOTSUPP;
}
rcu_read_lock();
slave = bond_first_slave_rcu(bond); if (!slave) goto out;
slave_ops = slave->dev->netdev_ops; if (!slave_ops->ndo_neigh_setup) goto out;
/* TODO: find another way [1] to implement this. * Passing a zeroed structure is fragile, * but at least we do not pass garbage. * * [1] One way would be that ndo_neigh_setup() never touch * struct neigh_parms, but propagate the new neigh_setup() * back to ___neigh_create() / neigh_parms_alloc()
*/
memset(&parms, 0, sizeof(parms));
ret = slave_ops->ndo_neigh_setup(slave->dev, &parms);
if (ret) goto out;
if (parms.neigh_setup)
ret = parms.neigh_setup(n);
out:
rcu_read_unlock(); return ret;
}
/* The bonding ndo_neigh_setup is called at init time beofre any * slave exists. So we must declare proxy setup function which will * be used at run time to resolve the actual slave neigh param setup. * * It's also called by master devices (such as vlans) to setup their * underlying devices. In that case - do nothing, we're already set up from * our init.
*/ staticint bond_neigh_setup(struct net_device *dev, struct neigh_parms *parms)
{ /* modify only our neigh_parms */ if (parms->dev == dev)
parms->neigh_setup = bond_neigh_init;
return 0;
}
/* Change the MTU of all of a master's slaves to match the master */ staticint bond_change_mtu(struct net_device *bond_dev, int new_mtu)
{ struct bonding *bond = netdev_priv(bond_dev); struct slave *slave, *rollback_slave; struct list_head *iter; int res = 0;
if (res) { /* If we failed to set the slave's mtu to the new value * we must abort the operation even in ACTIVE_BACKUP * mode, because if we allow the backup slaves to have * different mtu values than the active slave we'll * need to change their mtu when doing a failover. That * means changing their mtu from timer context, which * is probably not a good idea.
*/
slave_dbg(bond_dev, slave->dev, "err %d setting mtu to %d\n",
res, new_mtu); goto unwind;
}
}
WRITE_ONCE(bond_dev->mtu, new_mtu);
return 0;
unwind: /* unwind from head to the slave that failed */
bond_for_each_slave(bond, rollback_slave, iter) { int tmp_res;
/* Change HW address * * Note that many devices must be down to change the HW address, and * downing the master releases all slaves. We can make bonds full of * bonding devices to test this, however.
*/ staticint bond_set_mac_address(struct net_device *bond_dev, void *addr)
{ struct bonding *bond = netdev_priv(bond_dev); struct slave *slave, *rollback_slave; struct sockaddr_storage *ss = addr, tmp_ss; struct list_head *iter; int res = 0;
if (BOND_MODE(bond) == BOND_MODE_ALB) return bond_alb_set_mac_address(bond_dev, addr);
/* If fail_over_mac is enabled, do nothing and return success. * Returning an error causes ifenslave to fail.
*/ if (bond->params.fail_over_mac &&
BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP) return 0;
if (!is_valid_ether_addr(ss->__data)) return -EADDRNOTAVAIL;
bond_for_each_slave(bond, slave, iter) {
slave_dbg(bond_dev, slave->dev, "%s: slave=%p\n",
__func__, slave);
res = dev_set_mac_address(slave->dev, addr, NULL); if (res) { /* TODO: consider downing the slave * and retry ? * User should expect communications * breakage anyway until ARP finish * updating, so...
*/
slave_dbg(bond_dev, slave->dev, "%s: err %d\n",
__func__, res); goto unwind;
}
}
/** * bond_get_slave_by_id - get xmit slave with slave_id * @bond: bonding device that is transmitting * @slave_id: slave id up to slave_cnt-1 through which to transmit * * This function tries to get slave with slave_id but in case * it fails, it tries to find the first available slave for transmission.
*/ staticstruct slave *bond_get_slave_by_id(struct bonding *bond, int slave_id)
{ struct list_head *iter; struct slave *slave; int i = slave_id;
/* Here we start from the slave with slave_id */
bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) { if (bond_slave_can_tx(slave)) return slave;
}
}
/* Here we start from the first slave up to slave_id */
i = slave_id;
bond_for_each_slave_rcu(bond, slave, iter) { if (--i < 0) break; if (bond_slave_can_tx(slave)) return slave;
} /* no slave that can tx has been found */ return NULL;
}
/** * bond_rr_gen_slave_id - generate slave id based on packets_per_slave * @bond: bonding device to use * * Based on the value of the bonding device's packets_per_slave parameter * this function generates a slave id, which is usually used as the next * slave to transmit through.
*/ static u32 bond_rr_gen_slave_id(struct bonding *bond)
{
u32 slave_id; struct reciprocal_value reciprocal_packets_per_slave; int packets_per_slave = bond->params.packets_per_slave;
/* Start with the curr_active_slave that joined the bond as the * default for sending IGMP traffic. For failover purposes one * needs to maintain some consistency for the interface that will * send the join/membership reports. The curr_active_slave found * will send all of this type of traffic.
*/ if (skb->protocol == htons(ETH_P_IP)) { int noff = skb_network_offset(skb); struct iphdr *iph;
if (unlikely(!pskb_may_pull(skb, noff + sizeof(*iph)))) goto non_igmp;
iph = ip_hdr(skb); if (iph->protocol == IPPROTO_IGMP) {
slave = rcu_dereference(bond->curr_active_slave); if (slave) return slave; return bond_get_slave_by_id(bond, 0);
}
}
/* In active-backup mode, we know that bond->curr_active_slave is always valid if * the bond has a usable interface.
*/ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb, struct net_device *bond_dev)
{ struct bonding *bond = netdev_priv(bond_dev); struct slave *slave;
slave = bond_xmit_activebackup_slave_get(bond); if (slave) return bond_dev_queue_xmit(bond, skb, slave->dev);
return bond_tx_drop(bond_dev, skb);
}
/* Use this to update slave_array when (a) it's not appropriate to update * slave_array right away (note that update_slave_array() may sleep) * and / or (b) RTNL is not held.
*/ void bond_slave_arr_work_rearm(struct bonding *bond, unsignedlong delay)
{
queue_delayed_work(bond->wq, &bond->slave_arr_work, delay);
}
/* Slave array work handler. Holds only RTNL */ staticvoid bond_slave_arr_handler(struct work_struct *work)
{ struct bonding *bond = container_of(work, struct bonding,
slave_arr_work.work); int ret;
if (!rtnl_trylock()) goto err;
ret = bond_update_slave_arr(bond, NULL);
rtnl_unlock(); if (ret) {
pr_warn_ratelimited("Failed to update slave array from WT\n"); goto err;
} return;
err:
bond_slave_arr_work_rearm(bond, 1);
}
staticvoid bond_skip_slave(struct bond_up_slave *slaves, struct slave *skipslave)
{ int idx;
/* Rare situation where caller has asked to skip a specific * slave but allocation failed (most likely!). BTW this is * only possible when the call is initiated from * __bond_release_one(). In this situation; overwrite the * skipslave entry in the array with the last entry from the * array to avoid a situation where the xmit path may choose * this to-be-skipped slave to send a packet out.
*/ for (idx = 0; slaves && idx < slaves->count; idx++) { if (skipslave == slaves->arr[idx]) {
slaves->arr[idx] =
slaves->arr[slaves->count - 1];
slaves->count--; break;
}
}
}
/* Build the usable slaves array in control path for modes that use xmit-hash * to determine the slave interface - * (a) BOND_MODE_8023AD * (b) BOND_MODE_XOR * (c) (BOND_MODE_TLB || BOND_MODE_ALB) && tlb_dynamic_lb == 0 * * The caller is expected to hold RTNL only and NO other lock!
*/ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
{ struct bond_up_slave *usable_slaves = NULL, *all_slaves = NULL; struct slave *slave; struct list_head *iter; int agg_id = 0; int ret = 0;
spin_lock_bh(&bond->mode_lock); if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
spin_unlock_bh(&bond->mode_lock);
pr_debug("bond_3ad_get_active_agg_info failed\n"); /* No active aggragator means it's not safe to use * the previous array.
*/
bond_reset_slave_arr(bond); goto out;
}
spin_unlock_bh(&bond->mode_lock);
agg_id = ad_info.aggregator_id;
}
bond_for_each_slave(bond, slave, iter) { if (skipslave == slave) continue;
/* Use this Xmit function for 3AD as well as XOR modes. The current * usable slave array is formed in the control path. The xmit function * just calculates hash and sends the packet out.
*/ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
{ struct bonding *bond = netdev_priv(dev); struct bond_up_slave *slaves; struct slave *slave;
/* in broadcast mode, we send everything to all or usable slave interfaces. * under rcu_read_lock when this function is called.
*/ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb, struct net_device *bond_dev, bool all_slaves)
{ struct bonding *bond = netdev_priv(bond_dev); struct bond_up_slave *slaves; bool xmit_suc = false; bool skb_used = false; int slaves_count, i;
if (all_slaves)
slaves = rcu_dereference(bond->all_slaves); else
slaves = rcu_dereference(bond->usable_slaves);
slaves_count = slaves ? READ_ONCE(slaves->count) : 0; for (i = 0; i < slaves_count; i++) { struct slave *slave = slaves->arr[i]; struct sk_buff *skb2;
if (!(bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)) continue;
/* Lookup the slave that corresponds to a qid */ staticinlineint bond_slave_override(struct bonding *bond, struct sk_buff *skb)
{ struct slave *slave = NULL; struct list_head *iter;
if (!skb_rx_queue_recorded(skb)) return 1;
/* Find out if any slaves have the same mapping as this skb. */
bond_for_each_slave_rcu(bond, slave, iter) { if (READ_ONCE(slave->queue_id) == skb_get_queue_mapping(skb)) { if (bond_slave_is_up(slave) &&
slave->link == BOND_LINK_UP) {
bond_dev_queue_xmit(bond, skb, slave->dev); return 0;
} /* If the slave isn't UP, use default transmit policy. */ break;
}
}
return 1;
}
static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev)
{ /* This helper function exists to help dev_pick_tx get the correct * destination queue. Using a helper function skips a call to * skb_tx_hash and will put the skbs in the queue we expect on their * way down to the bonding driver.
*/
u16 txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0;
/* Save the original txq to restore before passing to the driver */
qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb_get_queue_mapping(skb);
if (unlikely(txq >= dev->real_num_tx_queues)) { do {
txq -= dev->real_num_tx_queues;
} while (txq >= dev->real_num_tx_queues);
} return txq;
}
/** * bond_sk_hash_l34 - generate a hash value based on the socket's L3 and L4 fields * @sk: socket to use for headers * * This function will extract the necessary field from the socket and use * them to generate a hash based on the LAYER34 xmit_policy. * Assumes that sk is a TCP or UDP socket.
*/ static u32 bond_sk_hash_l34(struct sock *sk)
{ struct flow_keys flow;
u32 hash;
/* tls_netdev might become NULL, even if tls_is_skb_tx_device_offloaded * was true, if tls_device_down is running in parallel, but it's OK, * because bond_get_slave_by_dev has a NULL check.
*/ if (likely(bond_get_slave_by_dev(bond, tls_netdev))) return bond_dev_queue_xmit(bond, skb, tls_netdev); return bond_tx_drop(dev, skb);
} #endif
/* If we risk deadlock from transmitting this in the * netpoll path, tell netpoll to queue the frame for later tx
*/ if (unlikely(is_netpoll_tx_blocked(dev))) return NETDEV_TX_BUSY;
rcu_read_lock(); if (bond_has_slaves(bond))
ret = __bond_start_xmit(skb, dev); else
ret = bond_tx_drop(dev, skb);
rcu_read_unlock();
/* If error happened on the first frame then we can pass the error up, otherwise * report the number of frames that were xmitted.
*/ if (err < 0) return (nxmit == 0 ? err : nxmit);
if (!slave_dev->netdev_ops->ndo_bpf ||
!slave_dev->netdev_ops->ndo_xdp_xmit) {
SLAVE_NL_ERR(dev, slave_dev, extack, "Slave device does not support XDP");
err = -EOPNOTSUPP; goto err;
}
if (dev_xdp_prog_count(slave_dev) > 0) {
SLAVE_NL_ERR(dev, slave_dev, extack, "Slave has XDP program loaded, please unload before enslaving");
err = -EOPNOTSUPP; goto err;
}
/* Set the BOND_PHC_INDEX flag to notify user space */ staticint bond_set_phc_index_flag(struct kernel_hwtstamp_config *kernel_cfg)
{ struct ifreq *ifr = kernel_cfg->ifr; struct hwtstamp_config cfg;
if (kernel_cfg->copied_to_user) { /* Lower device has a legacy implementation */ if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) return -EFAULT;
/* Since bond_slave_can_tx returns false for all inactive or down slaves, we * do not need to check mode. Though link speed might not represent * the true receive or transmit bandwidth (not all modes are symmetric) * this is an accurate maximum.
*/
bond_for_each_slave(bond, slave, iter) { if (bond_slave_can_tx(slave)) {
bond_update_speed_duplex(slave); if (slave->speed != SPEED_UNKNOWN) { if (BOND_MODE(bond) == BOND_MODE_BROADCAST)
speed = bond_mode_bcast_speed(slave,
speed); else
speed += slave->speed;
} if (cmd->base.duplex == DUPLEX_UNKNOWN &&
slave->duplex != DUPLEX_UNKNOWN)
cmd->base.duplex = slave->duplex;
}
}
cmd->base.speed = speed ? : SPEED_UNKNOWN;
#ifdef CONFIG_XFRM_OFFLOAD /* set up xfrm device ops (only supported in active-backup right now) */
bond_dev->xfrmdev_ops = &bond_xfrmdev_ops;
INIT_LIST_HEAD(&bond->ipsec_list);
mutex_init(&bond->ipsec_lock); #endif/* CONFIG_XFRM_OFFLOAD */
/* don't acquire bond device's netif_tx_lock when transmitting */
bond_dev->lltx = true;
/* Don't allow bond devices to change network namespaces. */
bond_dev->netns_immutable = true;
/* By default, we declare the bond to be fully * VLAN hardware accelerated capable. Special * care is taken in the various xmit functions * when there are slaves that are not hw accel * capable
*/
bond_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
bond_dev->features |= bond_dev->hw_features;
bond_dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
bond_dev->features |= NETIF_F_GSO_PARTIAL; #ifdef CONFIG_XFRM_OFFLOAD
bond_dev->hw_features |= BOND_XFRM_FEATURES; /* Only enable XFRM features if this is an active-backup config */ if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP)
bond_dev->features |= BOND_XFRM_FEATURES; #endif/* CONFIG_XFRM_OFFLOAD */
}
/* Destroy a bonding device. * Must be under rtnl_lock when this function is called.
*/ staticvoid bond_uninit(struct net_device *bond_dev)
{ struct bonding *bond = netdev_priv(bond_dev); struct list_head *iter; struct slave *slave;
bond_netpoll_cleanup(bond_dev);
/* Release the bonded slaves */
bond_for_each_slave(bond, slave, iter)
__bond_release_one(bond_dev, slave->dev, true, true);
netdev_info(bond_dev, "Released all slaves\n");
if (xmit_hash_policy) { if (bond_mode == BOND_MODE_ROUNDROBIN ||
bond_mode == BOND_MODE_ACTIVEBACKUP ||
bond_mode == BOND_MODE_BROADCAST) {
pr_info("xmit_hash_policy param is irrelevant in mode %s\n",
bond_mode_name(bond_mode));
} else {
bond_opt_initstr(&newval, xmit_hash_policy);
valptr = bond_opt_parse(bond_opt_get(BOND_OPT_XMIT_HASH),
&newval); if (!valptr) {
pr_err("Error: Invalid xmit_hash_policy \"%s\"\n",
xmit_hash_policy); return -EINVAL;
}
xmit_hashtype = valptr->value;
}
}
if (lacp_rate) { if (bond_mode != BOND_MODE_8023AD) {
pr_info("lacp_rate param is irrelevant in mode %s\n",
bond_mode_name(bond_mode));
} else {
bond_opt_initstr(&newval, lacp_rate);
valptr = bond_opt_parse(bond_opt_get(BOND_OPT_LACP_RATE),
&newval); if (!valptr) {
pr_err("Error: Invalid lacp rate \"%s\"\n",
lacp_rate); return -EINVAL;
}
lacp_fast = valptr->value;
}
}
if (ad_select) {
bond_opt_initstr(&newval, ad_select);
valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_SELECT),
&newval); if (!valptr) {
pr_err("Error: Invalid ad_select \"%s\"\n", ad_select); return -EINVAL;
}
params->ad_select = valptr->value; if (bond_mode != BOND_MODE_8023AD)
pr_warn("ad_select param only affects 802.3ad mode\n");
} else {
params->ad_select = BOND_AD_STABLE;
}
if (max_bonds < 0) {
pr_warn("Warning: max_bonds (%d) not in range %d-%d, so it was reset to BOND_DEFAULT_MAX_BONDS (%d)\n",
max_bonds, 0, INT_MAX, BOND_DEFAULT_MAX_BONDS);
max_bonds = BOND_DEFAULT_MAX_BONDS;
}
if (miimon < 0) {
pr_warn("Warning: miimon module parameter (%d), not in range 0-%d, so it was reset to 0\n",
miimon, INT_MAX);
miimon = 0;
}
if (updelay < 0) {
pr_warn("Warning: updelay module parameter (%d), not in range 0-%d, so it was reset to 0\n",
updelay, INT_MAX);
updelay = 0;
}
if (downdelay < 0) {
pr_warn("Warning: downdelay module parameter (%d), not in range 0-%d, so it was reset to 0\n",
downdelay, INT_MAX);
downdelay = 0;
}
if ((use_carrier != 0) && (use_carrier != 1)) {
pr_warn("Warning: use_carrier module parameter (%d), not of valid value (0/1), so it was set to 1\n",
use_carrier);
use_carrier = 1;
}
if (num_peer_notif < 0 || num_peer_notif > 255) {
pr_warn("Warning: num_grat_arp/num_unsol_na (%d) not in range 0-255 so it was reset to 1\n",
num_peer_notif);
num_peer_notif = 1;
}
/* reset values for 802.3ad/TLB/ALB */ if (!bond_mode_uses_arp(bond_mode)) { if (!miimon) {
pr_warn("Warning: miimon must be specified, otherwise bonding will not detect link failure, speed and duplex which are essential for 802.3ad operation\n");
pr_warn("Forcing miimon to 100msec\n");
miimon = BOND_DEFAULT_MIIMON;
}
}
if (tx_queues < 1 || tx_queues > 255) {
pr_warn("Warning: tx_queues (%d) should be between 1 and 255, resetting to %d\n",
tx_queues, BOND_DEFAULT_TX_QUEUES);
tx_queues = BOND_DEFAULT_TX_QUEUES;
}
if ((all_slaves_active != 0) && (all_slaves_active != 1)) {
pr_warn("Warning: all_slaves_active module parameter (%d), not of valid value (0/1), so it was set to 0\n",
all_slaves_active);
all_slaves_active = 0;
}
if (resend_igmp < 0 || resend_igmp > 255) {
pr_warn("Warning: resend_igmp (%d) should be between 0 and 255, resetting to %d\n",
resend_igmp, BOND_DEFAULT_RESEND_IGMP);
resend_igmp = BOND_DEFAULT_RESEND_IGMP;
}
bond_opt_initval(&newval, packets_per_slave); if (!bond_opt_parse(bond_opt_get(BOND_OPT_PACKETS_PER_SLAVE), &newval)) {
pr_warn("Warning: packets_per_slave (%d) should be between 0 and %u resetting to 1\n",
packets_per_slave, USHRT_MAX);
packets_per_slave = 1;
}
if (bond_mode == BOND_MODE_ALB) {
pr_notice("In ALB mode you might experience client disconnections upon reconnection of a link if the bonding module updelay parameter (%d msec) is incompatible with the forwarding delay time of the switch\n",
updelay);
}
if (!miimon) { if (updelay || downdelay) { /* just warn the user the up/down delay will have * no effect since miimon is zero...
*/
pr_warn("Warning: miimon module parameter not set and updelay (%d) or downdelay (%d) module parameter is set; updelay and downdelay have no effect unless miimon is set\n",
updelay, downdelay);
}
} else { /* don't allow arp monitoring */ if (arp_interval) {
pr_warn("Warning: miimon (%d) and arp_interval (%d) can't be used simultaneously, disabling ARP monitoring\n",
miimon, arp_interval);
arp_interval = 0;
}
if ((updelay % miimon) != 0) {
pr_warn("Warning: updelay (%d) is not a multiple of miimon (%d), updelay rounded to %d ms\n",
updelay, miimon, (updelay / miimon) * miimon);
}
updelay /= miimon;
if ((downdelay % miimon) != 0) {
pr_warn("Warning: downdelay (%d) is not a multiple of miimon (%d), downdelay rounded to %d ms\n",
downdelay, miimon,
(downdelay / miimon) * miimon);
}
downdelay /= miimon;
}
if (arp_interval < 0) {
pr_warn("Warning: arp_interval module parameter (%d), not in range 0-%d, so it was reset to 0\n",
arp_interval, INT_MAX);
arp_interval = 0;
}
for (arp_ip_count = 0, i = 0;
(arp_ip_count < BOND_MAX_ARP_TARGETS) && arp_ip_target[i]; i++) {
__be32 ip;
/* not a complete check, but good enough to catch mistakes */ if (!in4_pton(arp_ip_target[i], -1, (u8 *)&ip, -1, NULL) ||
!bond_is_ip_target_ok(ip)) {
pr_warn("Warning: bad arp_ip_target module parameter (%s), ARP monitoring will not be performed\n",
arp_ip_target[i]);
arp_interval = 0;
} else { if (bond_get_targets_ip(arp_target, ip) == -1)
arp_target[arp_ip_count++] = ip; else
pr_warn("Warning: duplicate address %pI4 in arp_ip_target, skipping\n",
&ip);
}
}
if (arp_interval && !arp_ip_count) { /* don't allow arping if no arp_ip_target given... */
pr_warn("Warning: arp_interval module parameter (%d) specified without providing an arp_ip_target parameter, arp_interval was reset to 0\n",
arp_interval);
arp_interval = 0;
}
if (arp_validate) { if (!arp_interval) {
pr_err("arp_validate requires arp_interval\n"); return -EINVAL;
}
if (miimon) {
pr_info("MII link monitoring set to %d ms\n", miimon);
} elseif (arp_interval) {
valptr = bond_opt_get_val(BOND_OPT_ARP_VALIDATE,
arp_validate_value);
pr_info("ARP monitoring set to %d ms, validate %s, with %d target(s):",
arp_interval, valptr->string, arp_ip_count);
for (i = 0; i < arp_ip_count; i++)
pr_cont(" %s", arp_ip_target[i]);
pr_cont("\n");
} elseif (max_bonds) { /* miimon and arp_interval not set, we need one so things * work as expected, see bonding.txt for details
*/
pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details\n");
}
if (primary && !bond_mode_uses_primary(bond_mode)) { /* currently, using a primary only makes sense * in active backup, TLB or ALB modes
*/
pr_warn("Warning: %s primary device specified but has no effect in %s mode\n",
primary, bond_mode_name(bond_mode));
primary = NULL;
}
valptr = bond_opt_parse(bond_opt_get(BOND_OPT_AD_USER_PORT_KEY),
&newval); if (!valptr) {
pr_err("Error: No ad_user_port_key default value"); return -EINVAL;
}
ad_user_port_key = valptr->value;
bond_opt_initstr(&newval, "default");
valptr = bond_opt_parse(bond_opt_get(BOND_OPT_TLB_DYNAMIC_LB), &newval); if (!valptr) {
pr_err("Error: No tlb_dynamic_lb default value"); return -EINVAL;
}
tlb_dynamic_lb = valptr->value;
if (lp_interval == 0) {
pr_warn("Warning: ip_interval must be between 1 and %d, so it was reset to %d\n",
INT_MAX, BOND_ALB_DEFAULT_LP_INTERVAL);
lp_interval = BOND_ALB_DEFAULT_LP_INTERVAL;
}
/* Create a new bond based on the specified name and bonding parameters. * If name is NULL, obtain a suitable "bond%d" name for us. * Caller must NOT hold rtnl_lock; we need to release it here before we * set up our sysfs entries.
*/ int bond_create(struct net *net, constchar *name)
{ struct net_device *bond_dev; struct bonding *bond; int res = -ENOMEM;
rtnl_lock();
bond_dev = alloc_netdev_mq(sizeof(struct bonding),
name ? name : "bond%d", NET_NAME_UNKNOWN,
bond_setup, tx_queues); if (!bond_dev) goto out;
bond = netdev_priv(bond_dev);
dev_net_set(bond_dev, net);
bond_dev->rtnl_link_ops = &bond_link_ops;
res = register_netdevice(bond_dev); if (res < 0) {
free_netdev(bond_dev); goto out;
}
/* According to commit 69b0216ac255 ("bonding: fix bonding_masters * race condition in bond unloading") we need to remove sysfs files * before we remove our devices (done later in bond_net_exit_rtnl())
*/ staticvoid __net_exit bond_net_pre_exit(struct net *net)
{ struct bond_net *bn = net_generic(net, bond_net_id);
/* Kill off any bonds created after unregistering bond rtnl ops */
list_for_each_entry_safe(bond, tmp_bond, &bn->dev_list, bond_list)
unregister_netdevice_queue(bond->dev, dev_kill_list);
}
/* According to commit 23fa5c2caae0 ("bonding: destroy proc directory * only after all bonds are gone") bond_destroy_proc_dir() is called * after bond_net_exit_rtnl() has completed.
*/ staticvoid __net_exit bond_net_exit_batch(struct list_head *net_list)
{ struct bond_net *bn; struct net *net;
#ifdef CONFIG_NET_POLL_CONTROLLER /* Make sure we don't have an imbalance on our netpoll blocking */
WARN_ON(atomic_read(&netpoll_block_tx)); #endif
}
module_init(bonding_init);
module_exit(bonding_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION(DRV_DESCRIPTION);
MODULE_AUTHOR("Thomas Davis, tadavis@lbl.gov and many others");
MODULE_IMPORT_NS("NETDEV_INTERNAL");
Messung V0.5 in Prozent
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.145Bemerkung:
(vorverarbeitet am 2026-04-28)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.