rcu_read_lock(); /* do not keep any additional per socket state, just signal * the address list in order. * Note: removal from the local address list during the msk life-cycle * can lead to additional addresses not being announced.
*/
list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) continue;
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) continue;
/* Non-fullmesh endpoint, fill in the single entry * corresponding to the primary MPC subflow remote address
*/ if (!fullmesh) { if (deny_id0) return 0;
if (!mptcp_pm_addr_families_match(sk, local, &remote)) return 0;
/* check first for announce */ if (msk->pm.add_addr_signaled < add_addr_signal_max) { /* due to racing events on both ends we can reach here while * previous add address is still running: if we invoke now * mptcp_pm_announce_addr(), that will fail and the * corresponding id will be marked as used. * Instead let the PM machinery reschedule us when the * current address announce will be completed.
*/ if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) return;
if (!select_signal_address(pernet, msk, &local)) goto subflow;
/* If the alloc fails, we are on memory pressure, not worth * continuing, and trying to create subflows.
*/ if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) return;
if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)
signal_and_subflow = true;
}
subflow: /* No need to try establishing subflows to remote id0 if not allowed */ if (mptcp_pm_add_addr_c_flag_case(msk)) gotoexit;
/* check if should create a new subflow */ while (msk->pm.local_addr_used < local_addr_max &&
msk->pm.subflows < subflows_max) { struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX]; bool fullmesh; int i, nr;
if (signal_and_subflow)
signal_and_subflow = false; elseif (!select_local_address(pernet, msk, &local)) break;
/* Special case for ID0: set the correct ID */ if (local.addr.id == msk->mpc_endpoint_id)
local.addr.id = 0; else/* local_addr_used is not decr for ID 0 */
msk->pm.local_addr_used++;
nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); if (nr == 0) continue;
spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++)
__mptcp_subflow_connect(sk, &local, &addrs[i]);
spin_lock_bh(&msk->pm.lock);
}
if (c_flag_case &&
(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW)) {
__clear_bit(locals[i].addr.id,
msk->pm.id_avail_bitmap);
if (!is_id0)
msk->pm.local_addr_used++;
}
/* Special case for ID0: set the correct ID */ if (is_id0)
locals[i].addr.id = 0;
msk->pm.subflows++;
i++;
}
}
rcu_read_unlock();
/* Special case: peer sets the C flag, accept one ADD_ADDR if default * limits are used -- accepting no ADD_ADDR -- and use subflow endpoints
*/ if (!i && c_flag_case) { unsignedint local_addr_max = mptcp_pm_get_local_addr_max(msk);
/* If the array is empty, fill in the single * 'IPADDRANY' local address
*/ if (!i) {
memset(&locals[i], 0, sizeof(locals[i]));
locals[i].addr.family = #if IS_ENABLED(CONFIG_MPTCP_IPV6)
remote->family == AF_INET6 &&
ipv6_addr_v4mapped(&remote->addr6) ? AF_INET : #endif
remote->family;
if (!mptcp_pm_addr_families_match(sk, &locals[i].addr, remote)) return 0;
if (lookup_subflow_by_daddr(&msk->conn_list, &remote)) return;
/* pick id 0 port, if none is provided the remote address */ if (!remote.port)
remote.port = sk->sk_dport;
/* connect to the specified remote address, using whatever * local address the routing configuration will pick.
*/
nr = fill_local_addresses_vec(msk, &remote, locals); if (nr == 0) return;
spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) if (__mptcp_subflow_connect(sk, &locals[i], &remote) == 0)
sf_created = true;
spin_lock_bh(&msk->pm.lock);
if (sf_created) { /* add_addr_accepted is not decr for ID 0 */ if (remote.id)
msk->pm.add_addr_accepted++; if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
msk->pm.subflows >= subflows_max)
WRITE_ONCE(msk->pm.accept_addr, false);
}
}
void mptcp_pm_nl_rm_addr(struct mptcp_sock *msk, u8 rm_id)
{ if (rm_id && WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { /* Note: if the subflow has been closed before, this * add_addr_accepted counter will not be decremented.
*/ if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk))
WRITE_ONCE(msk->pm.accept_addr, true);
}
}
/* caller must ensure the RCU grace period is already elapsed */ staticvoid __mptcp_pm_release_addr_entry(struct mptcp_pm_addr_entry *entry)
{ if (entry->lsk)
sock_release(entry->lsk);
kfree(entry);
}
spin_lock_bh(&pernet->lock); /* to keep the code simple, don't do IDR-like allocation for address ID, * just bail when we exceed limits
*/ if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID)
pernet->next_id = 1; if (pernet->addrs >= MPTCP_PM_ADDR_MAX) {
ret = -ERANGE; goto out;
} if (test_bit(entry->addr.id, pernet->id_bitmap)) {
ret = -EBUSY; goto out;
}
/* do not insert duplicate address, differentiate on port only * singled addresses
*/ if (!address_use_port(entry))
entry->addr.port = 0;
list_for_each_entry(cur, &pernet->local_addr_list, list) { if (mptcp_addresses_equal(&cur->addr, &entry->addr,
cur->addr.port || entry->addr.port)) { /* allow replacing the exiting endpoint only if such * endpoint is an implicit one and the user-space * did not provide an endpoint id
*/ if (!(cur->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)) {
ret = -EEXIST; goto out;
} if (entry->addr.id) goto out;
/* allow callers that only need to look up the local * addr's id to skip replacement. This allows them to * avoid calling synchronize_rcu in the packet recv * path.
*/ if (!replace) {
kfree(entry);
ret = cur->addr.id; goto out;
}
newsk = entry->lsk->sk; if (!newsk) return -EINVAL;
/* The subflow socket lock is acquired in a nested to the msk one * in several places, even by the TCP stack, and this msk is a kernel * socket: lockdep complains. Instead of propagating the _nested * modifiers in several places, re-init the lock class for the msk * socket to an mptcp specific one.
*/
sock_lock_init_class_and_name(newsk,
is_ipv6 ? "mlock-AF_INET6" : "mlock-AF_INET",
&mptcp_slock_keys[is_ipv6],
is_ipv6 ? "msk_lock-AF_INET6" : "msk_lock-AF_INET",
&mptcp_keys[is_ipv6]);
lock_sock(newsk);
ssk = __mptcp_nmpc_sk(mptcp_sk(newsk));
release_sock(newsk); if (IS_ERR(ssk)) return PTR_ERR(ssk);
/* We don't use mptcp_set_state() here because it needs to be called * under the msk socket lock. For the moment, that will not bring * anything more than only calling inet_sk_state_store(), because the * old status is known (TCP_CLOSE).
*/
inet_sk_state_store(newsk, TCP_LISTEN);
lock_sock(ssk);
WRITE_ONCE(mptcp_subflow_ctx(ssk)->pm_listener, true);
err = __inet_listen_sk(ssk, backlog); if (!err)
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CREATED);
release_sock(ssk); return err;
}
int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *skc)
{ struct mptcp_pm_addr_entry *entry; struct pm_nl_pernet *pernet; int ret;
pernet = pm_nl_get_pernet_from_msk(msk);
rcu_read_lock();
entry = __lookup_addr(pernet, &skc->addr);
ret = entry ? entry->addr.id : -1;
rcu_read_unlock(); if (ret >= 0) return ret;
/* address not found, add to local list */
entry = kmemdup(skc, sizeof(*skc), GFP_ATOMIC); if (!entry) return -ENOMEM;
entry->addr.port = 0;
ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); if (ret < 0)
kfree(entry);
/* Add an MPTCP endpoint */ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info)
{ struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; struct nlattr *attr; int ret;
if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) return -EINVAL;
attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
ret = mptcp_pm_parse_entry(attr, info, true, &addr); if (ret < 0) return ret;
if (addr.addr.port && !address_use_port(&addr)) {
NL_SET_ERR_MSG_ATTR(info->extack, attr, "flags must have signal and not subflow when using port"); return -EINVAL;
}
if (addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
addr.flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
NL_SET_ERR_MSG_ATTR(info->extack, attr, "flags mustn't have both signal and fullmesh"); return -EINVAL;
}
ret = mptcp_remove_anno_list_by_saddr(msk, addr); if (ret || force) {
spin_lock_bh(&msk->pm.lock); if (ret) {
__set_bit(addr->id, msk->pm.id_avail_bitmap);
msk->pm.add_addr_signaled--;
}
mptcp_pm_remove_addr(msk, &list);
spin_unlock_bh(&msk->pm.lock);
} return ret;
}
staticvoid __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id)
{ /* If it was marked as used, and not ID 0, decrement local_addr_used */ if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) &&
id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0))
msk->pm.local_addr_used--;
}
/* Remove an MPTCP endpoint */ int mptcp_pm_nl_del_addr_doit(struct sk_buff *skb, struct genl_info *info)
{ struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry addr, *entry; unsignedint addr_max; struct nlattr *attr; int ret;
if (GENL_REQ_ATTR_CHECK(info, MPTCP_PM_ENDPOINT_ADDR)) return -EINVAL;
attr = info->attrs[MPTCP_PM_ENDPOINT_ADDR];
ret = mptcp_pm_parse_entry(attr, info, false, &addr); if (ret < 0) return ret;
/* the zero id address is special: the first address used by the msk * always gets such an id, so different subflows can have different zero * id addresses. Additionally zero id is not accounted for in id_bitmap. * Let's use an 'mptcp_rm_list' instead of the common remove code.
*/ if (addr.addr.id == 0) return mptcp_nl_remove_id_zero_address(sock_net(skb->sk), &addr.addr);
if (!mptcp_pm_is_userspace(msk)) {
lock_sock(sk);
mptcp_pm_flush_addrs_and_subflows(msk, rm_list);
release_sock(sk);
}
sock_put(sk);
cond_resched();
}
}
/* caller must ensure the RCU grace period is already elapsed */ staticvoid __flush_addrs(struct list_head *list)
{ while (!list_empty(list)) { struct mptcp_pm_addr_entry *cur;
cur = list_entry(list->next, struct mptcp_pm_addr_entry, list);
list_del_rcu(&cur->list);
__mptcp_pm_release_addr_entry(cur);
}
}
int mptcp_pm_nl_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, struct genl_info *info)
{ struct pm_nl_pernet *pernet = genl_info_pm_nl(info); struct mptcp_pm_addr_entry *entry; int ret = -EINVAL;
rcu_read_lock();
entry = __lookup_addr_by_id(pernet, id); if (entry) {
*addr = *entry;
ret = 0;
}
rcu_read_unlock();
return ret;
}
int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb)
{ struct net *net = sock_net(msg->sk); struct mptcp_pm_addr_entry *entry; struct pm_nl_pernet *pernet; int id = cb->args[0]; int i;
pernet = pm_nl_get_pernet(net);
rcu_read_lock(); for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) { if (test_bit(i, pernet->id_bitmap)) {
entry = __lookup_addr_by_id(pernet, i); if (!entry) break;
if (entry->addr.id <= id) continue;
if (mptcp_pm_genl_fill_addr(msg, cb, entry) < 0) break;
if (list_empty(&msk->conn_list) || mptcp_pm_is_userspace(msk)) goto next;
lock_sock(sk); if (changed & MPTCP_PM_ADDR_FLAG_BACKUP)
mptcp_pm_mp_prio_send_ack(msk, &local->addr, NULL, bkup); /* Subflows will only be recreated if the SUBFLOW flag is set */ if (is_subflow && (changed & MPTCP_PM_ADDR_FLAG_FULLMESH))
mptcp_pm_nl_fullmesh(msk, &local->addr);
release_sock(sk);
/* net is removed from namespace list, can't race with * other modifiers, also netns core already waited for a * RCU grace period.
*/
__flush_addrs(&pernet->local_addr_list);
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.