// SPDX-License-Identifier: GPL-2.0-only /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * AF_SMC protocol family socket handler keeping the AF_INET sock address type * applies to SOCK_STREAM sockets only * offers an alternative communication option for TCP-protocol sockets * applicable with RoCE-cards only * * Initial restrictions: * - support for alternate links postponed * * Copyright IBM Corp. 2016, 2018 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> * based on prototype from Frank Blaschka
*/
static DEFINE_MUTEX(smc_server_lgr_pending); /* serialize link group * creation on server
*/ static DEFINE_MUTEX(smc_client_lgr_pending); /* serialize link group * creation on client
*/
staticstruct workqueue_struct *smc_tcp_ls_wq; /* wq for tcp listen work */ struct workqueue_struct *smc_hs_wq; /* wq for handshake work */ struct workqueue_struct *smc_close_wq; /* wq for close work */
if (READ_ONCE(sk->sk_ack_backlog) + atomic_read(&smc->queued_smc_hs) >
sk->sk_max_ack_backlog) goto drop;
if (sk_acceptq_is_full(&smc->sk)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); goto drop;
}
/* passthrough to original syn recv sock fct */
child = smc->ori_af_ops->syn_recv_sock(sk, skb, req, dst, req_unhash,
own_req); /* child must not inherit smc or its ops */ if (child) {
rcu_assign_sk_user_data(child, NULL);
write_lock_bh(&h->lock); if (sk_del_node_init(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
write_unlock_bh(&h->lock);
}
/* This will be called before user really release sock_lock. So do the * work which we didn't do because of user hold the sock_lock in the * BH context
*/ void smc_release_cb(struct sock *sk)
{ struct smc_sock *smc = smc_sk(sk);
if (smc->conn.tx_in_release_sock) {
smc_tx_pending(&smc->conn);
smc->conn.tx_in_release_sock = false;
}
}
/* cleanup for a dangling non-blocking connect */ if (smc->connect_nonblock && old_state == SMC_INIT)
tcp_abort(smc->clcsock->sk, ECONNABORTED);
if (cancel_work_sync(&smc->connect_work))
sock_put(&smc->sk); /* sock_hold in smc_connect for passive closing */
if (sk->sk_state == SMC_LISTEN) /* smc_close_non_accepted() is called and acquires * sock lock for child sockets again
*/
lock_sock_nested(sk, SINGLE_DEPTH_NESTING); else
lock_sock(sk);
/* if set, use value set by setsockopt() - else use IPv4 or SMC sysctl value */ staticvoid smc_adjust_sock_bufsizes(struct sock *nsk, struct sock *osk, unsignedlong mask)
{
nsk->sk_userlocks = osk->sk_userlocks; if (osk->sk_userlocks & SOCK_SNDBUF_LOCK)
nsk->sk_sndbuf = osk->sk_sndbuf; if (osk->sk_userlocks & SOCK_RCVBUF_LOCK)
nsk->sk_rcvbuf = osk->sk_rcvbuf;
}
staticvoid smc_copy_sock_settings(struct sock *nsk, struct sock *osk, unsignedlong mask)
{ /* options we don't get control via setsockopt for */
nsk->sk_type = osk->sk_type;
nsk->sk_sndtimeo = READ_ONCE(osk->sk_sndtimeo);
nsk->sk_rcvtimeo = READ_ONCE(osk->sk_rcvtimeo);
nsk->sk_mark = READ_ONCE(osk->sk_mark);
nsk->sk_priority = READ_ONCE(osk->sk_priority);
nsk->sk_rcvlowat = osk->sk_rcvlowat;
nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
nsk->sk_err = osk->sk_err;
#define SK_FLAGS_CLC_TO_SMC ((1UL << SOCK_URGINLINE) | \
(1UL << SOCK_KEEPOPEN) | \
(1UL << SOCK_LINGER) | \
(1UL << SOCK_DBG)) /* copy only settings and flags relevant for smc from clc to smc socket */ staticvoid smc_copy_sock_settings_to_smc(struct smc_sock *smc)
{
smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
}
/* register the new vzalloced sndbuf on all links */ staticint smcr_lgr_reg_sndbufs(struct smc_link *link, struct smc_buf_desc *snd_desc)
{ struct smc_link_group *lgr = link->lgr; int i, rc = 0;
if (!snd_desc->is_vm) return -EINVAL;
/* protect against parallel smcr_link_reg_buf() */
down_write(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&lgr->lnk[i])) continue;
rc = smcr_link_reg_buf(&lgr->lnk[i], snd_desc); if (rc) break;
}
up_write(&lgr->llc_conf_mutex); return rc;
}
/* register the new rmb on all links */ staticint smcr_lgr_reg_rmbs(struct smc_link *link, struct smc_buf_desc *rmb_desc)
{ struct smc_link_group *lgr = link->lgr; bool do_slow = false; int i, rc = 0;
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY); if (rc) return rc;
down_read(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&lgr->lnk[i])) continue; if (!rmb_desc->is_reg_mr[link->link_idx]) {
up_read(&lgr->llc_conf_mutex); goto slow_path;
}
} /* mr register already */ goto fast_path;
slow_path:
do_slow = true; /* protect against parallel smc_llc_cli_rkey_exchange() and * parallel smcr_link_reg_buf()
*/
down_write(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { if (!smc_link_active(&lgr->lnk[i])) continue;
rc = smcr_link_reg_buf(&lgr->lnk[i], rmb_desc); if (rc) goto out;
}
fast_path: /* exchange confirm_rkey msg with peer */
rc = smc_llc_do_confirm_rkey(link, rmb_desc); if (rc) {
rc = -EFAULT; goto out;
}
rmb_desc->is_conf_rkey = true;
out:
do_slow ? up_write(&lgr->llc_conf_mutex) : up_read(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl); return rc;
}
/* Receive CONFIRM LINK request from server over RoCE fabric. * Increasing the client's timeout by twice as much as the server's * timeout by default can temporarily avoid decline messages of * both sides crossing or colliding
*/
qentry = smc_llc_wait(link->lgr, NULL, 2 * SMC_LLC_WAIT_TIME,
SMC_LLC_CONFIRM_LINK); if (!qentry) { struct smc_clc_msg_decline dclc;
rc = smc_ib_modify_qp_rts(link); if (rc) return SMC_CLC_DECL_ERR_RDYLNK;
smc_wr_remember_qp_attr(link);
/* reg the sndbuf if it was vzalloced */ if (smc->conn.sndbuf_desc->is_vm) { if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc)) return SMC_CLC_DECL_ERR_REGBUF;
}
/* reg the rmb */ if (smcr_link_reg_buf(link, smc->conn.rmb_desc)) return SMC_CLC_DECL_ERR_REGBUF;
/* confirm_rkey is implicit on 1st contact */
smc->conn.rmb_desc->is_conf_rkey = true;
/* send CONFIRM LINK response over RoCE fabric */
rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP); if (rc < 0) return SMC_CLC_DECL_TIMEOUT_CL;
/* There might be some wait entries remaining * in smc sk->sk_wq and they should be woken up * as clcsock's wait queue is woken up.
*/
smc_fback_replace_callbacks(smc);
}
out:
mutex_unlock(&smc->clcsock_release_lock); return rc;
}
/* fall back during connect */ staticint smc_connect_fallback(struct smc_sock *smc, int reason_code)
{ struct net *net = sock_net(&smc->sk); int rc = 0;
/* decline and fall back during connect */ staticint smc_connect_decline_fallback(struct smc_sock *smc, int reason_code,
u8 version)
{ struct net *net = sock_net(&smc->sk); int rc;
if (reason_code < 0) { /* error, fallback is not possible */
this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt); if (smc->sk.sk_state == SMC_INIT)
sock_put(&smc->sk); /* passive closing */ return reason_code;
} if (reason_code != SMC_CLC_DECL_PEERDECL) {
rc = smc_clc_send_decline(smc, reason_code, version); if (rc < 0) {
this_cpu_inc(net->smc.smc_stats->clnt_hshake_err_cnt); if (smc->sk.sk_state == SMC_INIT)
sock_put(&smc->sk); /* passive closing */ return rc;
}
} return smc_connect_fallback(smc, reason_code);
}
smc_conn_free(conn); if (local_first && lgr_valid)
smc_lgr_cleanup_early(lgr);
}
/* check if there is a rdma device available for this connection. */ /* called for connect and listen */ staticint smc_find_rdma_device(struct smc_sock *smc, struct smc_init_info *ini)
{ /* PNET table look up: search active ib_device and port * within same PNETID that also contains the ethernet device * used for the internal TCP socket
*/
smc_pnet_find_roce_resource(smc->clcsock->sk, ini); if (!ini->check_smcrv2 && !ini->ib_dev) return SMC_CLC_DECL_NOSMCRDEV; if (ini->check_smcrv2 && !ini->smcrv2.ib_dev_v2) return SMC_CLC_DECL_NOSMCRDEV; return 0;
}
/* check if there is an ISM device available for this connection. */ /* called for connect and listen */ staticint smc_find_ism_device(struct smc_sock *smc, struct smc_init_info *ini)
{ /* Find ISM device with same PNETID as connecting interface */
smc_pnet_find_ism_resource(smc->clcsock->sk, ini); if (!ini->ism_dev[0]) return SMC_CLC_DECL_NOSMCDDEV; else
ini->ism_chid[0] = smc_ism_get_chid(ini->ism_dev[0]); return 0;
}
/* is chid unique for the ism devices that are already determined? */ staticbool smc_find_ism_v2_is_unique_chid(u16 chid, struct smc_init_info *ini, int cnt)
{ int i = (!ini->ism_dev[0]) ? 1 : 0;
for (; i < cnt; i++) if (ini->ism_chid[i] == chid) returnfalse; returntrue;
}
/* determine possible V2 ISM devices (either without PNETID or with PNETID plus * PNETID matching net_device)
*/ staticint smc_find_ism_v2_device_clnt(struct smc_sock *smc, struct smc_init_info *ini)
{ int rc = SMC_CLC_DECL_NOSMCDDEV; struct smcd_dev *smcd; int i = 1, entry = 1; bool is_emulated;
u16 chid;
if (smcd_indicated(ini->smc_type_v1))
rc = 0; /* already initialized for V1 */
mutex_lock(&smcd_dev_list.mutex);
list_for_each_entry(smcd, &smcd_dev_list.list, list) { if (smcd->going_away || smcd == ini->ism_dev[0]) continue;
chid = smc_ism_get_chid(smcd); if (!smc_find_ism_v2_is_unique_chid(chid, ini, i)) continue;
is_emulated = __smc_ism_is_emulated(chid); if (!smc_pnet_is_pnetid_set(smcd->pnetid) ||
smc_pnet_is_ndev_pnetid(sock_net(&smc->sk), smcd->pnetid)) { if (is_emulated && entry == SMCD_CLC_MAX_V2_GID_ENTRIES) /* It's the last GID-CHID entry left in CLC * Proposal SMC-Dv2 extension, but an Emulated- * ISM device will take two entries. So give * up it and try the next potential ISM device.
*/ continue;
ini->ism_dev[i] = smcd;
ini->ism_chid[i] = chid;
ini->is_smcd = true;
rc = 0;
i++;
entry = is_emulated ? entry + 2 : entry + 1; if (entry > SMCD_CLC_MAX_V2_GID_ENTRIES) break;
}
}
mutex_unlock(&smcd_dev_list.mutex);
ini->ism_offered_cnt = i - 1; if (!ini->ism_dev[0] && !ini->ism_dev[1])
ini->smcd_version = 0;
return rc;
}
/* Check for VLAN ID and register it on ISM device just for CLC handshake */ staticint smc_connect_ism_vlan_setup(struct smc_sock *smc, struct smc_init_info *ini)
{ if (ini->vlan_id && smc_ism_get_vlan(ini->ism_dev[0], ini->vlan_id)) return SMC_CLC_DECL_ISMVLANERR; return 0;
}
/* check if there is an ism device available */ if (!(ini->smcd_version & SMC_V1) ||
smc_find_ism_device(smc, ini) ||
smc_connect_ism_vlan_setup(smc, ini))
ini->smcd_version &= ~SMC_V1; /* else ISM V1 is supported for this connection */
/* check if there is an rdma device available */ if (!(ini->smcr_version & SMC_V1) ||
smc_find_rdma_device(smc, ini))
ini->smcr_version &= ~SMC_V1; /* else RDMA is supported for this connection */
/* check if there is an ism v2 device available */ if (!(ini->smcd_version & SMC_V2) ||
!smc_ism_is_v2_capable() ||
smc_find_ism_v2_device_clnt(smc, ini))
ini->smcd_version &= ~SMC_V2;
/* check if there is an rdma v2 device available */
ini->check_smcrv2 = true;
ini->smcrv2.saddr = smc->clcsock->sk->sk_rcv_saddr; if (!(ini->smcr_version & SMC_V2) || #if IS_ENABLED(CONFIG_IPV6)
(smc->clcsock->sk->sk_family == AF_INET6 &&
!ipv6_addr_v4mapped(&smc->clcsock->sk->sk_v6_rcv_saddr)) || #endif
!smc_clc_ueid_count() ||
smc_find_rdma_device(smc, ini))
ini->smcr_version &= ~SMC_V2;
ini->check_smcrv2 = false;
/* if neither ISM nor RDMA are supported, fallback */ if (ini->smc_type_v1 == SMC_TYPE_N && ini->smc_type_v2 == SMC_TYPE_N)
rc = SMC_CLC_DECL_NOSMCDEV;
return rc;
}
/* cleanup temporary VLAN ID registration used for CLC handshake. If ISM is * used, the VLAN ID will be registered again during the connection setup.
*/ staticint smc_connect_ism_vlan_cleanup(struct smc_sock *smc, struct smc_init_info *ini)
{ if (!smcd_indicated(ini->smc_type_v1)) return 0; if (ini->vlan_id && smc_ism_put_vlan(ini->ism_dev[0], ini->vlan_id)) return SMC_CLC_DECL_CNFERR; return 0;
}
if (ini->first_contact_local) {
link = smc->conn.lnk;
} else { /* set link that was assigned by server */
link = NULL; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *l = &smc->conn.lgr->lnk[i];
/* The server has chosen one of the proposed ISM devices for the communication. * Determine from the CHID of the received CLC ACCEPT the ISM device chosen.
*/ staticint
smc_v2_determine_accepted_chid(struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini)
{ int i;
for (i = 0; i < ini->ism_offered_cnt + 1; i++) { if (ini->ism_chid[i] == ntohs(aclc->d1.chid)) {
ini->ism_selected = i; return 0;
}
}
return -EPROTO;
}
/* setup for ISM connection of client */ staticint smc_connect_ism(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *aclc, struct smc_init_info *ini)
{
u8 *eid = NULL; int rc = 0;
rc = smc_v2_determine_accepted_chid(aclc, ini); if (rc) return rc;
if (__smc_ism_is_emulated(ini->ism_chid[ini->ism_selected]))
ini->ism_peer_gid[ini->ism_selected].gid_ext =
ntohll(aclc->d1.gid_ext); /* for non-Emulated-ISM devices, peer gid_ext remains 0. */
}
ini->ism_peer_gid[ini->ism_selected].gid = ntohll(aclc->d0.gid);
/* there is only one lgr role for SMC-D; use server lock */
mutex_lock(&smc_server_lgr_pending);
rc = smc_conn_create(smc, ini); if (rc) {
mutex_unlock(&smc_server_lgr_pending); return rc;
}
if (smc->use_fallback) return smc_connect_fallback(smc, smc->fallback_rsn);
/* if peer has not signalled SMC-capability, fall back */ if (!tcp_sk(smc->clcsock->sk)->syn_smc) return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
/* IPSec connections opt out of SMC optimizations */ if (using_ipsec(smc)) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_IPSEC,
version);
ini = kzalloc(sizeof(*ini), GFP_KERNEL); if (!ini) return smc_connect_decline_fallback(smc, SMC_CLC_DECL_MEM,
version);
/* get vlan id from IP device */ if (smc_vlan_by_tcpsk(smc->clcsock, ini)) {
ini->smcd_version &= ~SMC_V1;
ini->smcr_version = 0;
ini->smc_type_v1 = SMC_TYPE_N;
}
rc = smc_find_proposal_devices(smc, ini); if (rc) goto fallback;
/* perform CLC handshake */
rc = smc_connect_clc(smc, aclc, ini); if (rc) { /* -EAGAIN on timeout, see tcp_recvmsg() */ if (rc == -EAGAIN) {
rc = -ETIMEDOUT;
smc->sk.sk_err = ETIMEDOUT;
} goto vlan_cleanup;
}
/* check if smc modes and versions of CLC proposal and accept match */
rc = smc_connect_check_aclc(ini, aclc);
version = aclc->hdr.version == SMC_V1 ? SMC_V1 : SMC_V2; if (rc) goto vlan_cleanup;
/* depending on previous steps, connect using rdma or ism */ if (aclc->hdr.typev1 == SMC_TYPE_R) {
ini->smcr_version = version;
rc = smc_connect_rdma(smc, aclc, ini);
} elseif (aclc->hdr.typev1 == SMC_TYPE_D) {
ini->smcd_version = version;
rc = smc_connect_ism(smc, aclc, ini);
} if (rc) goto vlan_cleanup;
rc = __smc_connect(smc); if (rc < 0)
smc->sk.sk_err = -rc;
out: if (!sock_flag(&smc->sk, SOCK_DEAD)) { if (smc->sk.sk_err) {
smc->sk.sk_state_change(&smc->sk);
} else { /* allow polling before and after fallback decision */
smc->clcsock->sk->sk_write_space(smc->clcsock->sk);
smc->sk.sk_write_space(&smc->sk);
}
}
release_sock(&smc->sk);
}
int smc_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags)
{ struct sock *sk = sock->sk; struct smc_sock *smc; int rc = -EINVAL;
smc = smc_sk(sk);
/* separate smc parameter checking to be safe */ if (alen < sizeof(addr->sa_family)) goto out_err; if (addr->sa_family != AF_INET && addr->sa_family != AF_INET6) goto out_err;
lock_sock(sk); switch (sock->state) { default:
rc = -EINVAL; goto out; case SS_CONNECTED:
rc = sk->sk_state == SMC_ACTIVE ? -EISCONN : -EINVAL; goto out; case SS_CONNECTING: if (sk->sk_state == SMC_ACTIVE) goto connected; break; case SS_UNCONNECTED:
sock->state = SS_CONNECTING; break;
}
switch (sk->sk_state) { default: goto out; case SMC_CLOSED:
rc = sock_error(sk) ? : -ECONNABORTED;
sock->state = SS_UNCONNECTED; goto out; case SMC_ACTIVE:
rc = -EISCONN; goto out; case SMC_INIT: break;
}
mutex_lock(&lsmc->clcsock_release_lock); if (lsmc->clcsock)
rc = kernel_accept(lsmc->clcsock, &new_clcsock, SOCK_NONBLOCK);
mutex_unlock(&lsmc->clcsock_release_lock);
lock_sock(lsk); if (rc < 0 && rc != -EAGAIN)
lsk->sk_err = -rc; if (rc < 0 || lsk->sk_state == SMC_CLOSED) {
new_sk->sk_prot->unhash(new_sk); if (new_clcsock)
sock_release(new_clcsock);
new_sk->sk_state = SMC_CLOSED;
smc_sock_set_flag(new_sk, SOCK_DEAD);
sock_put(new_sk); /* final */
*new_smc = NULL; goto out;
}
/* new clcsock has inherited the smc listen-specific sk_data_ready * function; switch it back to the original sk_data_ready function
*/
new_clcsock->sk->sk_data_ready = lsmc->clcsk_data_ready;
/* if new clcsock has also inherited the fallback-specific callback * functions, switch them back to the original ones.
*/ if (lsmc->use_fallback) { if (lsmc->clcsk_state_change)
new_clcsock->sk->sk_state_change = lsmc->clcsk_state_change; if (lsmc->clcsk_write_space)
new_clcsock->sk->sk_write_space = lsmc->clcsk_write_space; if (lsmc->clcsk_error_report)
new_clcsock->sk->sk_error_report = lsmc->clcsk_error_report;
}
/* add a just created sock to the accept queue of the listen sock as * candidate for a following socket accept call from user space
*/ staticvoid smc_accept_enqueue(struct sock *parent, struct sock *sk)
{ struct smc_sock *par = smc_sk(parent);
/* remove a socket from the accept queue of its parental listening socket */ staticvoid smc_accept_unlink(struct sock *sk)
{ struct smc_sock *par = smc_sk(sk)->listen_smc;
/* remove a sock from the accept queue to bind it to a new socket created * for a socket accept call from user space
*/ struct sock *smc_accept_dequeue(struct sock *parent, struct socket *new_sock)
{ struct smc_sock *isk, *n; struct sock *new_sk;
list_for_each_entry_safe(isk, n, &smc_sk(parent)->accept_q, accept_q) {
new_sk = (struct sock *)isk;
smc_accept_unlink(new_sk); if (new_sk->sk_state == SMC_CLOSED) {
new_sk->sk_prot->unhash(new_sk); if (isk->clcsock) {
sock_release(isk->clcsock);
isk->clcsock = NULL;
}
sock_put(new_sk); /* final */ continue;
} if (new_sock) {
sock_graft(new_sk, new_sock);
new_sock->state = SS_CONNECTED; if (isk->use_fallback) {
smc_sk(new_sk)->clcsock->file = new_sock->file;
isk->clcsock->file->private_data = isk->clcsock;
}
} return new_sk;
} return NULL;
}
/* clean up for a created but never accepted sock */ void smc_close_non_accepted(struct sock *sk)
{ struct smc_sock *smc = smc_sk(sk);
sock_hold(sk); /* sock_put below */
lock_sock(sk); if (!sk->sk_lingertime) /* wait for peer closing */
WRITE_ONCE(sk->sk_lingertime, SMC_MAX_STREAM_WAIT_TIMEOUT);
__smc_release(smc);
release_sock(sk);
sock_put(sk); /* sock_hold above */
sock_put(sk); /* final sock_put */
}
/* reg the sndbuf if it was vzalloced*/ if (smc->conn.sndbuf_desc->is_vm) { if (smcr_link_reg_buf(link, smc->conn.sndbuf_desc)) return SMC_CLC_DECL_ERR_REGBUF;
}
/* reg the rmb */ if (smcr_link_reg_buf(link, smc->conn.rmb_desc)) return SMC_CLC_DECL_ERR_REGBUF;
/* send CONFIRM LINK request to client over the RoCE fabric */
rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ); if (rc < 0) return SMC_CLC_DECL_TIMEOUT_CL;
/* receive CONFIRM LINK response from client over the RoCE fabric */
qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
SMC_LLC_CONFIRM_LINK); if (!qentry) { struct smc_clc_msg_decline dclc;
/* listen worker: decline and fall back if possible */ staticvoid smc_listen_decline(struct smc_sock *new_smc, int reason_code, int local_first, u8 version)
{ /* RDMA setup failed, switch back to TCP */
smc_conn_abort(new_smc, local_first); if (reason_code < 0 ||
smc_switch_to_fallback(new_smc, reason_code)) { /* error, no fallback possible */
smc_listen_out_err(new_smc); return;
} if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) { if (smc_clc_send_decline(new_smc, reason_code, version) < 0) {
smc_listen_out_err(new_smc); return;
}
}
smc_listen_out_connected(new_smc);
}
mutex_lock(&smcd_dev_list.mutex); if (pclc_smcd->ism.chid) { /* check for ISM device matching proposed native ISM device */
smcd_gid.gid = ntohll(pclc_smcd->ism.gid);
smcd_gid.gid_ext = 0;
smc_check_ism_v2_match(ini, ntohs(pclc_smcd->ism.chid),
&smcd_gid, &matches);
} for (i = 0; i < smc_v2_ext->hdr.ism_gid_cnt; i++) { /* check for ISM devices matching proposed non-native ISM * devices
*/
smcd_gid.gid = ntohll(smcd_v2_ext->gidchid[i].gid);
smcd_gid.gid_ext = 0;
chid = ntohs(smcd_v2_ext->gidchid[i].chid); if (__smc_ism_is_emulated(chid)) { if ((i + 1) == smc_v2_ext->hdr.ism_gid_cnt ||
chid != ntohs(smcd_v2_ext->gidchid[i + 1].chid)) /* each Emulated-ISM device takes two GID-CHID * entries and CHID of the second entry repeats * that of the first entry. * * So check if the next GID-CHID entry exists * and both two entries' CHIDs are the same.
*/ continue;
smcd_gid.gid_ext =
ntohll(smcd_v2_ext->gidchid[++i].gid);
}
smc_check_ism_v2_match(ini, chid, &smcd_gid, &matches);
}
mutex_unlock(&smcd_dev_list.mutex);
if (!ini->ism_dev[0]) {
smc_find_ism_store_rc(SMC_CLC_DECL_NOSMCD2DEV, ini); goto not_found;
}
smc_ism_get_system_eid(&eid); if (!smc_clc_match_eid(ini->negotiated_eid, smc_v2_ext,
smcd_v2_ext->system_eid, eid)) goto not_found;
/* separate - outside the smcd_dev_list.lock */
smcd_version = ini->smcd_version; for (i = 0; i < matches; i++) {
ini->smcd_version = SMC_V2;
ini->is_smcd = true;
ini->ism_selected = i;
rc = smc_listen_ism_init(new_smc, ini); if (rc) {
smc_find_ism_store_rc(rc, ini); /* try next active ISM device */ continue;
} return; /* matching and usable V2 ISM device found */
} /* no V2 ISM device could be initialized */
ini->smcd_version = smcd_version; /* restore original value */
ini->negotiated_eid[0] = 0;
if (!local_first) { /* reg sendbufs if they were vzalloced */ if (conn->sndbuf_desc->is_vm) { if (smcr_lgr_reg_sndbufs(conn->lnk,
conn->sndbuf_desc)) return SMC_CLC_DECL_ERR_REGBUF;
} if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc)) return SMC_CLC_DECL_ERR_REGBUF;
}
/* determine the local device matching to proposal */ staticint smc_listen_find_device(struct smc_sock *new_smc, struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini)
{ int prfx_rc;
/* check for ISM device matching V2 proposed device */
smc_find_ism_v2_device_serv(new_smc, pclc, ini); if (ini->ism_dev[0]) return 0;
/* check for matching IP prefix and subnet length (V1) */
prfx_rc = smc_listen_prfx_check(new_smc, pclc); if (prfx_rc)
smc_find_ism_store_rc(prfx_rc, ini);
/* get vlan id from IP device */ if (smc_vlan_by_tcpsk(new_smc->clcsock, ini)) return ini->rc ?: SMC_CLC_DECL_GETVLANERR;
/* check for ISM device matching V1 proposed device */ if (!prfx_rc)
smc_find_ism_v1_device_serv(new_smc, pclc, ini); if (ini->ism_dev[0]) return 0;
if (!smcr_indicated(pclc->hdr.typev1) &&
!smcr_indicated(pclc->hdr.typev2)) /* skip RDMA and decline */ return ini->rc ?: SMC_CLC_DECL_NOSMCDDEV;
/* check if RDMA V2 is available */
smc_find_rdma_v2_device_serv(new_smc, pclc, ini); if (ini->smcrv2.ib_dev_v2) return 0;
/* check if RDMA V1 is available */ if (!prfx_rc) { int rc;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.