// SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * Generic netlink support functions to configure an SMC-R PNET table * * Copyright IBM Corp. 2016 * * Author(s): Thomas Richter <tmricht@linux.vnet.ibm.com>
*/
/* Add the reference to a given network device to the pnet table.
*/ staticint smc_pnet_add_by_ndev(struct net_device *ndev)
{ struct smc_pnetentry *pnetelem, *tmp_pe; struct smc_pnettable *pnettable; struct net *net = dev_net(ndev); struct smc_net *sn; int rc = -ENOENT;
/* get pnettable for namespace */
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
/* Remove the reference to a given network device from the pnet table.
*/ staticint smc_pnet_remove_by_ndev(struct net_device *ndev)
{ struct smc_pnetentry *pnetelem, *tmp_pe; struct smc_pnettable *pnettable; struct net *net = dev_net(ndev); struct smc_net *sn; int rc = -ENOENT;
/* get pnettable for namespace */
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
/* The limit for pnetid is 16 characters. * Valid characters should be (single-byte character set) a-z, A-Z, 0-9. * Lower case letters are converted to upper case. * Interior blanks should not be used.
*/ staticbool smc_pnetid_valid(constchar *pnet_name, char *pnetid)
{ char *bf = skip_spaces(pnet_name);
size_t len = strlen(bf); char *end = bf + len;
if (!len) returnfalse; while (--end >= bf && isspace(*end))
; if (end - bf >= SMC_MAX_PNETID_LEN) returnfalse; while (bf <= end) { if (!isalnum(*bf)) returnfalse;
*pnetid++ = islower(*bf) ? toupper(*bf) : *bf;
bf++;
}
*pnetid = '\0'; returntrue;
}
/* Find an infiniband device by a given name. The device might not exist. */ staticstruct smc_ib_device *smc_pnet_find_ib(char *ib_name)
{ struct smc_ib_device *ibdev;
/* Find an smcd device by a given name. The device might not exist. */ staticstruct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
{ struct smcd_dev *smcd_dev;
/* check if (base) netdev already has a pnetid. If there is one, we do * not want to add a pnet table entry
*/
rc = -EEXIST;
ndev = dev_get_by_name(net, eth_name); /* dev_hold() */ if (ndev) {
base_ndev = pnet_find_base_ndev(ndev); if (!smc_pnetid_by_dev_port(base_ndev->dev.parent,
base_ndev->dev_port, ndev_pnetid)) goto out_put;
}
/* add a new netdev entry to the pnet table if there isn't one */
rc = -ENOMEM;
new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); if (!new_pe) goto out_put;
new_pe->type = SMC_PNET_ETH;
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
strscpy(new_pe->eth_name, eth_name);
rc = -EEXIST;
new_netdev = true;
mutex_lock(&pnettable->lock);
list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) { if (tmp_pe->type == SMC_PNET_ETH &&
!strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
new_netdev = false; break;
}
} if (new_netdev) { if (ndev) {
new_pe->ndev = ndev;
netdev_tracker_alloc(ndev, &new_pe->dev_tracker,
GFP_ATOMIC);
}
list_add_tail(&new_pe->list, &pnettable->pnetlist);
mutex_unlock(&pnettable->lock);
} else {
mutex_unlock(&pnettable->lock);
kfree(new_pe); goto out_put;
} if (ndev)
pr_warn_ratelimited("smc: net device %s " "applied user defined pnetid %.16s\n",
new_pe->eth_name, new_pe->pnet_name); return 0;
/* try to apply the pnetid to active devices */
ib_dev = smc_pnet_find_ib(ib_name); if (ib_dev) {
ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name); if (ibdev_applied)
pr_warn_ratelimited("smc: ib device %s ibport %d " "applied user defined pnetid " "%.16s\n", ib_dev->ibdev->name,
ib_port,
ib_dev->pnetid[ib_port - 1]);
}
smcd = smc_pnet_find_smcd(ib_name); if (smcd) {
smcddev_applied = smc_pnet_apply_smcd(smcd, pnet_name); if (smcddev_applied) {
dev = smcd->ops->get_dev(smcd);
pr_warn_ratelimited("smc: smcd device %s " "applied user defined pnetid " "%.16s\n", dev_name(dev),
smcd->pnetid);
}
} /* Apply fails when a device has a hardware-defined pnetid set, do not * add a pnet table entry in that case.
*/ if (!ibdev_applied || !smcddev_applied) return -EEXIST;
/* add a new ib entry to the pnet table if there isn't one */
new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL); if (!new_pe) return -ENOMEM;
new_pe->type = SMC_PNET_IB;
memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX);
new_pe->ib_port = ib_port;
/* Append a pnetid to the end of the pnet table if not already on this list.
*/ staticint smc_pnet_enter(struct net *net, struct nlattr *tb[])
{ char pnet_name[SMC_MAX_PNETID_LEN + 1]; struct smc_pnettable *pnettable; bool new_netdev = false; bool new_ibdev = false; struct smc_net *sn;
u8 ibport = 1; char *string; int rc;
/* get pnettable for namespace */
sn = net_generic(net, smc_net_id);
pnettable = &sn->pnettable;
rc = -EINVAL; if (!tb[SMC_PNETID_NAME]) goto error;
string = (char *)nla_data(tb[SMC_PNETID_NAME]); if (!smc_pnetid_valid(string, pnet_name)) goto error;
base_dev = __pnet_find_base_ndev(dev); if (base_dev->flags & IFF_UP &&
!smc_pnetid_by_dev_port(base_dev->dev.parent, base_dev->dev_port,
ndev_pnetid)) { /* add to PNETIDs list */
smc_pnet_add_pnetid(net, ndev_pnetid);
}
}
/* create initial list of netdevice pnetids */ staticvoid smc_pnet_create_pnetids_list(struct net *net)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct net_device *dev;
/* Newly created netns do not have devices. * Do not even acquire rtnl.
*/ if (list_empty(&net->dev_base_head)) return;
/* Note: This might not be needed, because smc_pnet_netdev_event() * is also calling smc_pnet_add_base_pnetid() when handling * NETDEV_UP event.
*/
rtnl_lock();
for_each_netdev(net, dev)
smc_pnet_add_base_pnetid(net, dev, ndev_pnetid);
rtnl_unlock();
}
/* clean up list of netdevice pnetids */ staticvoid smc_pnet_destroy_pnetids_list(struct net *net)
{ struct smc_net *sn = net_generic(net, smc_net_id); struct smc_pnetids_ndev_entry *pe, *temp_pe;
/* Determine one base device for stacked net devices. * If the lower device level contains more than one devices * (for instance with bonding slaves), just the first device * is used to reach a base device.
*/ staticstruct net_device *pnet_find_base_ndev(struct net_device *ndev)
{
rtnl_lock();
ndev = __pnet_find_base_ndev(ndev);
rtnl_unlock(); return ndev;
}
staticint smc_pnet_determine_gid(struct smc_ib_device *ibdev, int i, struct smc_init_info *ini)
{ if (!ini->check_smcrv2 &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->ib_gid, NULL,
NULL)) {
ini->ib_dev = ibdev;
ini->ib_port = i; return 0;
} if (ini->check_smcrv2 &&
!smc_ib_determine_gid(ibdev, i, ini->vlan_id, ini->smcrv2.ib_gid_v2,
NULL, &ini->smcrv2)) {
ini->smcrv2.ib_dev_v2 = ibdev;
ini->smcrv2.ib_port_v2 = i; return 0;
} return -ENODEV;
}
/* find a roce device for the given pnetid */ staticvoid _smc_pnet_find_roce_by_pnetid(u8 *pnet_id, struct smc_init_info *ini, struct smc_ib_device *known_dev, struct net *net)
{ struct smc_ib_device *ibdev; int i;
mutex_lock(&smc_ib_devices.mutex);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) { if (ibdev == known_dev ||
!rdma_dev_access_netns(ibdev->ibdev, net)) continue; for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue; if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) &&
smc_ib_port_active(ibdev, i) &&
!test_bit(i - 1, ibdev->ports_going_away)) { if (!smc_pnet_determine_gid(ibdev, i, ini)) goto out;
}
}
}
out:
mutex_unlock(&smc_ib_devices.mutex);
}
/* find alternate roce device with same pnet_id, vlan_id and net namespace */ void smc_pnet_find_alt_roce(struct smc_link_group *lgr, struct smc_init_info *ini, struct smc_ib_device *known_dev)
{ struct net *net = lgr->net;
_smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev, net);
}
/* if handshake network device belongs to a roce device, return its * IB device and port
*/ staticvoid smc_pnet_find_rdma_dev(struct net_device *netdev, struct smc_init_info *ini)
{ struct net *net = dev_net(netdev); struct smc_ib_device *ibdev;
mutex_lock(&smc_ib_devices.mutex);
list_for_each_entry(ibdev, &smc_ib_devices.list, list) { struct net_device *ndev; int i;
/* check rdma net namespace */ if (!rdma_dev_access_netns(ibdev->ibdev, net)) continue;
for (i = 1; i <= SMC_MAX_PORTS; i++) { if (!rdma_is_port_valid(ibdev->ibdev, i)) continue;
ndev = ib_device_get_netdev(ibdev->ibdev, i); if (!ndev) continue;
dev_put(ndev); if (netdev == ndev &&
smc_ib_port_active(ibdev, i) &&
!test_bit(i - 1, ibdev->ports_going_away)) { if (!smc_pnet_determine_gid(ibdev, i, ini)) break;
}
}
}
mutex_unlock(&smc_ib_devices.mutex);
}
/* Determine the corresponding IB device port based on the hardware PNETID. * Searching stops at the first matching active IB device port with vlan_id * configured. * If nothing found, check pnetid table. * If nothing found, try to use handshake device
*/ staticvoid smc_pnet_find_roce_by_pnetid(struct net_device *ndev, struct smc_init_info *ini)
{
u8 ndev_pnetid[SMC_MAX_PNETID_LEN]; struct net_device *base_ndev; struct net *net;
base_ndev = pnet_find_base_ndev(ndev);
net = dev_net(ndev); if (smc_pnetid_by_dev_port(base_ndev->dev.parent, base_ndev->dev_port,
ndev_pnetid) &&
smc_pnet_find_ndev_pnetid_by_table(base_ndev, ndev_pnetid) &&
smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) {
smc_pnet_find_rdma_dev(base_ndev, ini); return; /* pnetid could not be determined */
}
_smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL, net);
}
ndev = pnet_find_base_ndev(ndev); if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
ndev_pnetid) &&
smc_pnet_find_ndev_pnetid_by_table(ndev, ndev_pnetid)) return; /* pnetid could not be determined */
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.