/* * This file is part of the Chelsio T4 PCI-E SR-IOV Virtual Function Ethernet * driver for Linux. * * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE.
*/
/* * The driver uses the best interrupt scheme available on a platform in the * order MSI-X then MSI. This parameter determines which of these schemes the * driver may consider as follows: * * msi = 2: choose from among MSI-X and MSI * msi = 1: only consider MSI interrupts * * Note that unlike the Physical Function driver, this Virtual Function driver * does _not_ support legacy INTx interrupts (this limitation is mandated by * the PCI-E SR-IOV standard).
*/ #define MSI_MSIX 2 #define MSI_MSI 1 #define MSI_DEFAULT MSI_MSIX
staticint msi = MSI_DEFAULT;
module_param(msi, int, 0644);
MODULE_PARM_DESC(msi, "whether to use MSI-X or MSI");
/* * Fundamental constants. * ======================
*/
/* * For purposes of manipulating the Free List size we need to * recognize that Free Lists are actually Egress Queues (the host * produces free buffers which the hardware consumes), Egress Queues * indices are all in units of Egress Context Units bytes, and free * list entries are 64-bit PCI DMA addresses. And since the state of * the Producer Index == the Consumer Index implies an EMPTY list, we * always have at least one Egress Unit's worth of Free List entries * unused. See sge.c for more details ...
*/
EQ_UNIT = SGE_EQ_IDXSIZE,
FL_PER_EQ_UNIT = EQ_UNIT / sizeof(__be64),
MIN_FL_RESID = FL_PER_EQ_UNIT,
};
/* * Global driver state. * ====================
*/
staticstruct dentry *cxgb4vf_debugfs_root;
/* * OS "Callback" functions. * ========================
*/
/* * The link status has changed on the indicated "port" (Virtual Interface).
*/ void t4vf_os_link_changed(struct adapter *adapter, int pidx, int link_ok)
{ struct net_device *dev = adapter->port[pidx];
/* * If the port is disabled or the current recorded "link up" * status matches the new status, just return.
*/ if (!netif_running(dev) || link_ok == netif_carrier_ok(dev)) return;
/* * Tell the OS that the link status has changed and print a short * informative message on the console about the event.
*/ if (link_ok) { constchar *s; constchar *fc; conststruct port_info *pi = netdev_priv(dev);
netif_carrier_on(dev);
switch (pi->link_cfg.speed) { case 100:
s = "100Mbps"; break; case 1000:
s = "1Gbps"; break; case 10000:
s = "10Gbps"; break; case 25000:
s = "25Gbps"; break; case 40000:
s = "40Gbps"; break; case 100000:
s = "100Gbps"; break;
default:
s = "unknown"; break;
}
switch ((int)pi->link_cfg.fc) { case PAUSE_RX:
fc = "RX"; break;
/* Calculate the hash vector for the updated list and program it */
list_for_each_entry(entry, &adapter->mac_hlist, list) {
ucast |= is_unicast_ether_addr(entry->addr);
vec |= (1ULL << hash_mac_addr(entry->addr));
} return t4vf_set_addr_hash(adapter, pi->viid, ucast, vec, false);
}
/** * cxgb4vf_change_mac - Update match filter for a MAC address. * @pi: the port_info * @viid: the VI id * @tcam_idx: TCAM index of existing filter for old value of MAC address, * or -1 * @addr: the new MAC address value * @persistent: whether a new MAC allocation should be persistent * * Modifies an MPS filter and sets it to the new MAC address if * @tcam_idx >= 0, or adds the MAC address to a new filter if * @tcam_idx < 0. In the latter case the address is added persistently * if @persist is %true. * Addresses are programmed to hash region, if tcam runs out of entries. *
*/ staticint cxgb4vf_change_mac(struct port_info *pi, unsignedint viid, int *tcam_idx, const u8 *addr, bool persistent)
{ struct hash_mac_addr *new_entry, *entry; struct adapter *adapter = pi->adapter; int ret;
ret = t4vf_change_mac(adapter, viid, *tcam_idx, addr, persistent); /* We ran out of TCAM entries. try programming hash region. */ if (ret == -ENOMEM) { /* If the MAC address to be updated is in the hash addr * list, update it from the list
*/
list_for_each_entry(entry, &adapter->mac_hlist, list) { if (entry->iface_mac) {
ether_addr_copy(entry->addr, addr); goto set_hash;
}
}
new_entry = kzalloc(sizeof(*new_entry), GFP_KERNEL); if (!new_entry) return -ENOMEM;
ether_addr_copy(new_entry->addr, addr);
new_entry->iface_mac = true;
list_add_tail(&new_entry->list, &adapter->mac_hlist);
set_hash:
ret = cxgb4vf_set_addr_hash(pi);
} elseif (ret >= 0) {
*tcam_idx = ret;
ret = 0;
}
return ret;
}
/* * Net device operations. * ======================
*/
/* * Perform the MAC and PHY actions needed to enable a "port" (Virtual * Interface).
*/ staticint link_start(struct net_device *dev)
{ int ret; struct port_info *pi = netdev_priv(dev);
/* * We do not set address filters and promiscuity here, the stack does * that step explicitly. Enable vlan accel.
*/
ret = t4vf_set_rxmode(pi->adapter, pi->viid, dev->mtu, -1, -1, -1, 1, true); if (ret == 0)
ret = cxgb4vf_change_mac(pi, pi->viid,
&pi->xact_addr_filt,
dev->dev_addr, true);
/* * We don't need to actually "start the link" itself since the * firmware will do that for us when the first Virtual Interface * is enabled on a port.
*/ if (ret == 0)
ret = t4vf_enable_pi(pi->adapter, pi, true, true);
return ret;
}
/* * Name the MSI-X interrupts.
*/ staticvoid name_msix_vecs(struct adapter *adapter)
{ int namelen = sizeof(adapter->msix_info[0].desc) - 1; int pidx;
/* * Turn on NAPI and start up interrupts on a response queue.
*/ staticvoid qenable(struct sge_rspq *rspq)
{
napi_enable(&rspq->napi);
/* * 0-increment the Going To Sleep register to start the timer and * enable interrupts.
*/
t4_write_reg(rspq->adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
CIDXINC_V(0) |
SEINTARM_V(rspq->intr_params) |
INGRESSQID_V(rspq->cntxt_id));
}
/* * Enable NAPI scheduling and interrupt generation for all Receive Queues.
*/ staticvoid enable_rx(struct adapter *adapter)
{ int rxq; struct sge *s = &adapter->sge;
/* * The interrupt queue doesn't use NAPI so we do the 0-increment of * its Going To Sleep register here to get it started.
*/ if (adapter->flags & CXGB4VF_USING_MSI)
t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_GTS,
CIDXINC_V(0) |
SEINTARM_V(s->intrq.intr_params) |
INGRESSQID_V(s->intrq.cntxt_id));
}
/* * Wait until all NAPI handlers are descheduled.
*/ staticvoid quiesce_rx(struct adapter *adapter)
{ struct sge *s = &adapter->sge; int rxq;
switch (opcode) { case CPL_FW6_MSG: { /* * We've received an asynchronous message from the firmware.
*/ conststruct cpl_fw6_msg *fw_msg = cpl; if (fw_msg->type == FW6_TYPE_CMD_RPL)
t4vf_handle_fw_rpl(adapter, fw_msg->data); break;
}
case CPL_FW4_MSG: { /* FW can send EGR_UPDATEs encapsulated in a CPL_FW4_MSG.
*/ conststruct cpl_sge_egr_update *p = (void *)(rsp + 3);
opcode = CPL_OPCODE_G(ntohl(p->opcode_qid)); if (opcode != CPL_SGE_EGR_UPDATE) {
dev_err(adapter->pdev_dev, "unexpected FW4/CPL %#x on FW event queue\n"
, opcode); break;
}
cpl = (void *)p;
}
fallthrough;
case CPL_SGE_EGR_UPDATE: { /* * We've received an Egress Queue Status Update message. We * get these, if the SGE is configured to send these when the * firmware passes certain points in processing our TX * Ethernet Queue or if we make an explicit request for one. * We use these updates to determine when we may need to * restart a TX Ethernet Queue which was stopped for lack of * free TX Queue Descriptors ...
*/ conststruct cpl_sge_egr_update *p = cpl; unsignedint qid = EGR_QID_G(be32_to_cpu(p->opcode_qid)); struct sge *s = &adapter->sge; struct sge_txq *tq; struct sge_eth_txq *txq; unsignedint eq_idx;
/* * Perform sanity checking on the Queue ID to make sure it * really refers to one of our TX Ethernet Egress Queues which * is active and matches the queue's ID. None of these error * conditions should ever happen so we may want to either make * them fatal and/or conditionalized under DEBUG.
*/
eq_idx = EQ_IDX(s, qid); if (unlikely(eq_idx >= MAX_EGRQ)) {
dev_err(adapter->pdev_dev, "Egress Update QID %d out of range\n", qid); break;
}
tq = s->egr_map[eq_idx]; if (unlikely(tq == NULL)) {
dev_err(adapter->pdev_dev, "Egress Update QID %d TXQ=NULL\n", qid); break;
}
txq = container_of(tq, struct sge_eth_txq, q); if (unlikely(tq->abs_id != qid)) {
dev_err(adapter->pdev_dev, "Egress Update QID %d refers to TXQ %d\n",
qid, tq->abs_id); break;
}
/* * Restart a stopped TX Queue which has less than half of its * TX ring in use ...
*/
txq->q.restarts++;
netif_tx_wake_queue(txq->txq); break;
}
/* * Allocate SGE TX/RX response queues. Determine how many sets of SGE queues * to use and initializes them. We support multiple "Queue Sets" per port if * we have MSI-X, otherwise just one queue set per port.
*/ staticint setup_sge_queues(struct adapter *adapter)
{ struct sge *s = &adapter->sge; int err, pidx, msix;
/* * Clear "Queue Set" Free List Starving and TX Queue Mapping Error * state.
*/
bitmap_zero(s->starving_fl, MAX_EGRQ);
/* * If we're using MSI interrupt mode we need to set up a "forwarded * interrupt" queue which we'll set up with our MSI vector. The rest * of the ingress queues will be set up to forward their interrupts to * this queue ... This must be first since t4vf_sge_alloc_rxq() uses * the intrq's queue ID as the interrupt forwarding queue for the * subsequent calls ...
*/ if (adapter->flags & CXGB4VF_USING_MSI) {
err = t4vf_sge_alloc_rxq(adapter, &s->intrq, false,
adapter->port[0], 0, NULL, NULL); if (err) goto err_free_queues;
}
/* * Allocate each "port"'s initial Queue Sets. These can be changed * later on ... up to the point where any interface on the adapter is * brought up at which point lots of things get nailed down * permanently ...
*/
msix = MSIX_IQFLINT;
for_each_port(adapter, pidx) { struct net_device *dev = adapter->port[pidx]; struct port_info *pi = netdev_priv(dev); struct sge_eth_rxq *rxq = &s->ethrxq[pi->first_qset]; struct sge_eth_txq *txq = &s->ethtxq[pi->first_qset]; int qs;
/* * The FW_IQ_CMD doesn't return the Absolute Queue IDs * for Free Lists but since all of the Egress Queues * (including Free Lists) have Relative Queue IDs * which are computed as Absolute - Base Queue ID, we * can synthesize the Absolute Queue IDs for the Free * Lists. This is useful for debugging purposes when * we want to dump Queue Contexts via the PF Driver.
*/
rxq->fl.abs_id = rxq->fl.cntxt_id + s->egr_base;
EQ_MAP(s, rxq->fl.abs_id) = &rxq->fl;
}
} return 0;
/* * Set up Receive Side Scaling (RSS) to distribute packets to multiple receive * queues. We configure the RSS CPU lookup table to distribute to the number * of HW receive queues, and the response queue lookup table to narrow that * down to the response queues actually configured for each "port" (Virtual * Interface). We always configure the RSS mapping for all ports since the * mapping table has plenty of entries.
*/ staticint setup_rss(struct adapter *adapter)
{ int pidx;
/* * Perform Global RSS Mode-specific initialization.
*/ switch (adapter->params.rss.mode) { case FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL: /* * If Tunnel All Lookup isn't specified in the global * RSS Configuration, then we need to specify a * default Ingress Queue for any ingress packets which * aren't hashed. We'll use our first ingress queue * ...
*/ if (!adapter->params.rss.u.basicvirtual.tnlalllookup) { union rss_vi_config config;
err = t4vf_read_rss_vi_config(adapter,
pi->viid,
&config); if (err) return err;
config.basicvirtual.defaultq =
rxq[0].rspq.abs_id;
err = t4vf_write_rss_vi_config(adapter,
pi->viid,
&config); if (err) return err;
} break;
}
}
return 0;
}
/* * Bring the adapter up. Called whenever we go from no "ports" open to having * one open. This function performs the actions necessary to make an adapter * operational, such as completing the initialization of HW modules, and * enabling interrupts. Must be called with the rtnl lock held. (Note that * this is called "cxgb_up" in the PF Driver.)
*/ staticint adapter_up(struct adapter *adapter)
{ int err;
/* * If this is the first time we've been called, perform basic * adapter setup. Once we've done this, many of our adapter * parameters can no longer be changed ...
*/ if ((adapter->flags & CXGB4VF_FULL_INIT_DONE) == 0) {
err = setup_sge_queues(adapter); if (err) return err;
err = setup_rss(adapter); if (err) {
t4vf_free_sge_resources(adapter); return err;
}
if (adapter->flags & CXGB4VF_USING_MSIX)
name_msix_vecs(adapter);
adapter->flags |= CXGB4VF_FULL_INIT_DONE;
}
/* * Acquire our interrupt resources. We only support MSI-X and MSI.
*/
BUG_ON((adapter->flags &
(CXGB4VF_USING_MSIX | CXGB4VF_USING_MSI)) == 0); if (adapter->flags & CXGB4VF_USING_MSIX)
err = request_msix_queue_irqs(adapter); else
err = request_irq(adapter->pdev->irq,
t4vf_intr_handler(adapter), 0,
adapter->name, adapter); if (err) {
dev_err(adapter->pdev_dev, "request_irq failed, err %d\n",
err); return err;
}
/* * Enable NAPI ingress processing and return success.
*/
enable_rx(adapter);
t4vf_sge_start(adapter);
return 0;
}
/* * Bring the adapter down. Called whenever the last "port" (Virtual * Interface) closed. (Note that this routine is called "cxgb_down" in the PF * Driver.)
*/ staticvoid adapter_down(struct adapter *adapter)
{ /* * Free interrupt resources.
*/ if (adapter->flags & CXGB4VF_USING_MSIX)
free_msix_queue_irqs(adapter); else
free_irq(adapter->pdev->irq, adapter);
/* * Wait for NAPI handlers to finish.
*/
quiesce_rx(adapter);
}
/* * Start up a net device.
*/ staticint cxgb4vf_open(struct net_device *dev)
{ int err; struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter;
/* * If we don't have a connection to the firmware there's nothing we * can do.
*/ if (!(adapter->flags & CXGB4VF_FW_OK)) return -ENXIO;
/* * If this is the first interface that we're opening on the "adapter", * bring the "adapter" up now.
*/ if (adapter->open_device_map == 0) {
err = adapter_up(adapter); if (err) return err;
}
/* It's possible that the basic port information could have * changed since we first read it.
*/
err = t4vf_update_port_info(pi); if (err < 0) goto err_unwind;
/* * Note that this interface is up and start everything up ...
*/
err = link_start(dev); if (err) goto err_unwind;
err_unwind: if (adapter->open_device_map == 0)
adapter_down(adapter); return err;
}
/* * Shut down a net device. This routine is called "cxgb_close" in the PF * Driver ...
*/ staticint cxgb4vf_stop(struct net_device *dev)
{ struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter;
ret = t4vf_alloc_mac_filt(adapter, pi->viid, free, 1, maclist,
NULL, ucast ? &uhash : &mhash, false); if (ret < 0) goto out; /* if hash != 0, then add the addr to hash addr list * so on the end we will calculate the hash for the * list and program it
*/ if (uhash || mhash) {
new_entry = kzalloc(sizeof(*new_entry), GFP_ATOMIC); if (!new_entry) return -ENOMEM;
ether_addr_copy(new_entry->addr, mac_addr);
list_add_tail(&new_entry->list, &adapter->mac_hlist);
ret = cxgb4vf_set_addr_hash(pi);
}
out: return ret < 0 ? ret : 0;
}
/* If the MAC address to be removed is in the hash addr * list, delete it from the list and update hash vector
*/
list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist, list) { if (ether_addr_equal(entry->addr, mac_addr)) {
list_del(&entry->list);
kfree(entry); return cxgb4vf_set_addr_hash(pi);
}
}
ret = t4vf_free_mac_filt(adapter, pi->viid, 1, maclist, false); return ret < 0 ? -EINVAL : 0;
}
/* * Set RX properties of a port, such as promiscruity, address filters, and MTU. * If @mtu is -1 it is left unchanged.
*/ staticint set_rxmode(struct net_device *dev, int mtu, bool sleep_ok)
{ struct port_info *pi = netdev_priv(dev);
/* * Set the current receive modes on the device.
*/ staticvoid cxgb4vf_set_rxmode(struct net_device *dev)
{ /* unfortunately we can't return errors to the stack */
set_rxmode(dev, -1, false);
}
/* * Find the entry in the interrupt holdoff timer value array which comes * closest to the specified interrupt holdoff value.
*/ staticint closest_timer(conststruct sge *s, int us)
{ int i, timer_idx = 0, min_delta = INT_MAX;
for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) { int delta = us - s->timer_val[i]; if (delta < 0)
delta = -delta; if (delta < min_delta) {
min_delta = delta;
timer_idx = i;
}
} return timer_idx;
}
staticint closest_thres(conststruct sge *s, int thres)
{ int i, delta, pktcnt_idx = 0, min_delta = INT_MAX;
for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
delta = thres - s->counter_val[i]; if (delta < 0)
delta = -delta; if (delta < min_delta) {
min_delta = delta;
pktcnt_idx = i;
}
} return pktcnt_idx;
}
/* * Return a queue's interrupt hold-off time in us. 0 means no timer.
*/ staticunsignedint qtimer_val(conststruct adapter *adapter, conststruct sge_rspq *rspq)
{ unsignedint timer_idx = QINTR_TIMER_IDX_G(rspq->intr_params);
/** * set_rxq_intr_params - set a queue's interrupt holdoff parameters * @adapter: the adapter * @rspq: the RX response queue * @us: the hold-off time in us, or 0 to disable timer * @cnt: the hold-off packet count, or 0 to disable counter * * Sets an RX response queue's interrupt hold-off time and packet count. * At least one of the two needs to be enabled for the queue to generate * interrupts.
*/ staticint set_rxq_intr_params(struct adapter *adapter, struct sge_rspq *rspq, unsignedint us, unsignedint cnt)
{ unsignedint timer_idx;
/* * If both the interrupt holdoff timer and count are specified as * zero, default to a holdoff count of 1 ...
*/ if ((us | cnt) == 0)
cnt = 1;
/* * If an interrupt holdoff count has been specified, then find the * closest configured holdoff count and use that. If the response * queue has already been created, then update its queue context * parameters ...
*/ if (cnt) { int err;
u32 v, pktcnt_idx;
/* * Return a version number to identify the type of adapter. The scheme is: * - bits 0..9: chip version * - bits 10..15: chip revision
*/ staticinlineunsignedint mk_adap_vers(conststruct adapter *adapter)
{ /* * Chip version 4, revision 0x3f (cxgb4vf).
*/ return CHELSIO_CHIP_VERSION(adapter->params.chip) | (0x3f << 10);
}
/* * Execute the specified ioctl command.
*/ staticint cxgb4vf_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
{ int ret = 0;
switch (cmd) { /* * The VF Driver doesn't have access to any of the other * common Ethernet device ioctl()'s (like reading/writing * PHY registers, etc.
*/
default:
ret = -EOPNOTSUPP; break;
} return ret;
}
/* * Change the device's MTU.
*/ staticint cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
{ int ret; struct port_info *pi = netdev_priv(dev);
ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
-1, -1, -1, -1, true); if (!ret)
WRITE_ONCE(dev->mtu, new_mtu); return ret;
}
static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
netdev_features_t features)
{ /* * Since there is no support for separate rx/tx vlan accel * enable/disable make sure tx flag is always in same state as rx.
*/ if (features & NETIF_F_HW_VLAN_CTAG_RX)
features |= NETIF_F_HW_VLAN_CTAG_TX; else
features &= ~NETIF_F_HW_VLAN_CTAG_TX;
if (changed & NETIF_F_HW_VLAN_CTAG_RX)
t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
features & NETIF_F_HW_VLAN_CTAG_TX, 0);
return 0;
}
/* * Change the devices MAC address.
*/ staticint cxgb4vf_set_mac_addr(struct net_device *dev, void *_addr)
{ int ret; struct sockaddr *addr = _addr; struct port_info *pi = netdev_priv(dev);
if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL;
ret = cxgb4vf_change_mac(pi, pi->viid, &pi->xact_addr_filt,
addr->sa_data, true); if (ret < 0) return ret;
eth_hw_addr_set(dev, addr->sa_data); return 0;
}
#ifdef CONFIG_NET_POLL_CONTROLLER /* * Poll all of our receive queues. This is called outside of normal interrupt * context.
*/ staticvoid cxgb4vf_poll_controller(struct net_device *dev)
{ struct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter;
if (adapter->flags & CXGB4VF_USING_MSIX) { struct sge_eth_rxq *rxq; int nqsets;
/* * Ethtool operations. * =================== * * Note that we don't support any ethtool operations which change the physical * state of the port to which we're linked.
*/
/** * fw_caps_to_lmm - translate Firmware to ethtool Link Mode Mask * @port_type: Firmware Port Type * @fw_caps: Firmware Port Capabilities * @link_mode_mask: ethtool Link Mode Mask * * Translate a Firmware Port Capabilities specification to an ethtool * Link Mode Mask.
*/ staticvoid fw_caps_to_lmm(enum fw_port_type port_type, unsignedint fw_caps, unsignedlong *link_mode_mask)
{ #define SET_LMM(__lmm_name) \
__set_bit(ETHTOOL_LINK_MODE_ ## __lmm_name ## _BIT, \
link_mode_mask)
#define FW_CAPS_TO_LMM(__fw_name, __lmm_name) \ do { \ if (fw_caps & FW_PORT_CAP32_ ## __fw_name) \
SET_LMM(__lmm_name); \
} while (0)
switch (port_type) { case FW_PORT_TYPE_BT_SGMII: case FW_PORT_TYPE_BT_XFI: case FW_PORT_TYPE_BT_XAUI:
SET_LMM(TP);
FW_CAPS_TO_LMM(SPEED_100M, 100baseT_Full);
FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full); break;
case FW_PORT_TYPE_KX4: case FW_PORT_TYPE_KX:
SET_LMM(Backplane);
FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
FW_CAPS_TO_LMM(SPEED_10G, 10000baseKX4_Full); break;
case FW_PORT_TYPE_KR:
SET_LMM(Backplane);
FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full); break;
case FW_PORT_TYPE_BP_AP:
SET_LMM(Backplane);
FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
FW_CAPS_TO_LMM(SPEED_10G, 10000baseR_FEC);
FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full); break;
case FW_PORT_TYPE_FIBER_XFI: case FW_PORT_TYPE_FIBER_XAUI: case FW_PORT_TYPE_SFP: case FW_PORT_TYPE_QSFP_10G: case FW_PORT_TYPE_QSA:
SET_LMM(FIBRE);
FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full); break;
case FW_PORT_TYPE_BP40_BA: case FW_PORT_TYPE_QSFP:
SET_LMM(FIBRE);
FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full); break;
case FW_PORT_TYPE_CR_QSFP: case FW_PORT_TYPE_SFP28:
SET_LMM(FIBRE);
FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
FW_CAPS_TO_LMM(SPEED_10G, 10000baseT_Full);
FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full); break;
case FW_PORT_TYPE_KR_SFP28:
SET_LMM(Backplane);
FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
FW_CAPS_TO_LMM(SPEED_25G, 25000baseKR_Full); break;
case FW_PORT_TYPE_KR_XLAUI:
SET_LMM(Backplane);
FW_CAPS_TO_LMM(SPEED_1G, 1000baseKX_Full);
FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
FW_CAPS_TO_LMM(SPEED_40G, 40000baseKR4_Full); break;
case FW_PORT_TYPE_CR2_QSFP:
SET_LMM(FIBRE);
FW_CAPS_TO_LMM(SPEED_50G, 50000baseSR2_Full); break;
case FW_PORT_TYPE_KR4_100G: case FW_PORT_TYPE_CR4_QSFP:
SET_LMM(FIBRE);
FW_CAPS_TO_LMM(SPEED_1G, 1000baseT_Full);
FW_CAPS_TO_LMM(SPEED_10G, 10000baseKR_Full);
FW_CAPS_TO_LMM(SPEED_40G, 40000baseSR4_Full);
FW_CAPS_TO_LMM(SPEED_25G, 25000baseCR_Full);
FW_CAPS_TO_LMM(SPEED_50G, 50000baseCR2_Full);
FW_CAPS_TO_LMM(SPEED_100G, 100000baseCR4_Full); break;
/* For the nonce, the Firmware doesn't send up Port State changes * when the Virtual Interface attached to the Port is down. So * if it's down, let's grab any changes.
*/ if (!netif_running(dev))
(void)t4vf_update_port_info(pi);
/* Translate the Firmware FEC Support into the ethtool value. We * always support IEEE 802.3 "automatic" selection of Link FEC type if * any FEC is supported.
*/
fec->fec = fwcap_to_eth_fec(lc->pcaps); if (fec->fec != ETHTOOL_FEC_OFF)
fec->fec |= ETHTOOL_FEC_AUTO;
/* Translate the current internal FEC parameters into the * ethtool values.
*/
fec->active_fec = cc_to_eth_fec(lc->fec); return 0;
}
/* * Set current adapter message level.
*/ staticvoid cxgb4vf_set_msglevel(struct net_device *dev, u32 msglevel)
{
netdev2adap(dev)->msg_enable = msglevel;
}
/* * Return the device's current Queue Set ring size parameters along with the * allowed maximum values. Since ethtool doesn't understand the concept of * multi-queue devices, we just return the current values associated with the * first Queue Set.
*/ staticvoid cxgb4vf_get_ringparam(struct net_device *dev, struct ethtool_ringparam *rp, struct kernel_ethtool_ringparam *kernel_rp, struct netlink_ext_ack *extack)
{ conststruct port_info *pi = netdev_priv(dev); conststruct sge *s = &pi->adapter->sge;
/* * Set the Queue Set ring size parameters for the device. Again, since * ethtool doesn't allow for the concept of multiple queues per device, we'll * apply these new values across all of the Queue Sets associated with the * device -- after vetting them of course!
*/ staticint cxgb4vf_set_ringparam(struct net_device *dev, struct ethtool_ringparam *rp, struct kernel_ethtool_ringparam *kernel_rp, struct netlink_ext_ack *extack)
{ conststruct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter; struct sge *s = &adapter->sge; int qs;
/* * Return the interrupt holdoff timer and count for the first Queue Set on the * device. Our extension ioctl() (the cxgbtool interface) allows the * interrupt holdoff timer to be read on all of the device's Queue Sets.
*/ staticint cxgb4vf_get_coalesce(struct net_device *dev, struct ethtool_coalesce *coalesce, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack)
{ conststruct port_info *pi = netdev_priv(dev); conststruct adapter *adapter = pi->adapter; conststruct sge_rspq *rspq = &adapter->sge.ethrxq[pi->first_qset].rspq;
/* * Set the RX interrupt holdoff timer and count for the first Queue Set on the * interface. Our extension ioctl() (the cxgbtool interface) allows us to set * the interrupt holdoff timer on any of the device's Queue Sets.
*/ staticint cxgb4vf_set_coalesce(struct net_device *dev, struct ethtool_coalesce *coalesce, struct kernel_ethtool_coalesce *kernel_coal, struct netlink_ext_ack *extack)
{ conststruct port_info *pi = netdev_priv(dev); struct adapter *adapter = pi->adapter;
/* * Port stats maintained per queue of the port.
*/ struct queue_port_stats {
u64 tso;
u64 tx_csum;
u64 rx_csum;
u64 vlan_ex;
u64 vlan_ins;
u64 lro_pkts;
u64 lro_merged;
};
/* * Strings for the ETH_SS_STATS statistics set ("ethtool -S"). Note that * these need to match the order of statistics returned by * t4vf_get_port_stats().
*/ staticconstchar stats_strings[][ETH_GSTRING_LEN] = { /* * These must match the layout of the t4vf_port_stats structure.
*/ "TxBroadcastBytes ", "TxBroadcastFrames ", "TxMulticastBytes ", "TxMulticastFrames ", "TxUnicastBytes ", "TxUnicastFrames ", "TxDroppedFrames ", "TxOffloadBytes ", "TxOffloadFrames ", "RxBroadcastBytes ", "RxBroadcastFrames ", "RxMulticastBytes ", "RxMulticastFrames ", "RxUnicastBytes ", "RxUnicastFrames ", "RxErrorFrames ",
/* * These are accumulated per-queue statistics and must match the * order of the fields in the queue_port_stats structure.
*/ "TSO ", "TxCsumOffload ", "RxCsumGood ", "VLANextractions ", "VLANinsertions ", "GROPackets ", "GROMerged ",
};
/* * Return the number of statistics in the specified statistics set.
*/ staticint cxgb4vf_get_sset_count(struct net_device *dev, int sset)
{ switch (sset) { case ETH_SS_STATS: return ARRAY_SIZE(stats_strings); default: return -EOPNOTSUPP;
} /*NOTREACHED*/
}
/* * Return the strings for the specified statistics set.
*/ staticvoid cxgb4vf_get_strings(struct net_device *dev,
u32 sset,
u8 *data)
{ switch (sset) { case ETH_SS_STATS:
memcpy(data, stats_strings, sizeof(stats_strings)); break;
}
}
/* * Small utility routine to accumulate queue statistics across the queues of * a "port".
*/ staticvoid collect_sge_port_stats(conststruct adapter *adapter, conststruct port_info *pi, struct queue_port_stats *stats)
{ conststruct sge_eth_txq *txq = &adapter->sge.ethtxq[pi->first_qset]; conststruct sge_eth_rxq *rxq = &adapter->sge.ethrxq[pi->first_qset]; int qs;
/* * Return the size of our register map.
*/ staticint cxgb4vf_get_regs_len(struct net_device *dev)
{ return T4VF_REGMAP_SIZE;
}
/* * Dump a block of registers, start to end inclusive, into a buffer.
*/ staticvoid reg_block_dump(struct adapter *adapter, void *regbuf, unsignedint start, unsignedint end)
{
u32 *bp = regbuf + start - T4VF_REGMAP_START;
for ( ; start <= end; start += sizeof(u32)) { /* * Avoid reading the Mailbox Control register since that * can trigger a Mailbox Ownership Arbitration cycle and * interfere with communication with the firmware.
*/ if (start == T4VF_CIM_BASE_ADDR + CIM_VF_EXT_MAILBOX_CTRL)
*bp++ = 0xffff; else
*bp++ = t4_read_reg(adapter, start);
}
}
/* * /sys/kernel/debug/cxgb4vf support code and data. * ================================================
*/
/* * Show Firmware Mailbox Command/Reply Log * * Note that we don't do any locking when dumping the Firmware Mailbox Log so * it's possible that we can catch things during a log update and therefore * see partially corrupted log entries. But i9t's probably Good Enough(tm). * If we ever decide that we want to make sure that we're dumping a coherent * log, we'd need to perform locking in the mailbox logging and in * mboxlog_open() where we'd need to grab the entire mailbox log in one go * like we do for the Firmware Device Log. But as stated above, meh ...
*/ staticint mboxlog_show(struct seq_file *seq, void *v)
{ struct adapter *adapter = seq->private; struct mbox_cmd_log *log = adapter->mbox_log; struct mbox_cmd *entry; int entry_idx, i;
if (v == SEQ_START_TOKEN) {
seq_printf(seq, "%10s %15s %5s %5s %s\n", "Seq#", "Tstamp", "Atime", "Etime", "Command/Reply"); return 0;
}
/* * Return the number of "entries" in our "file". We group the multi-Queue * sections with QPL Queue Sets per "entry". The sections of the output are: * * Ethernet RX/TX Queue Sets * Firmware Event Queue * Forwarded Interrupt Queue (if in MSI mode)
*/ staticint sge_queue_entries(conststruct adapter *adapter)
{ return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
((adapter->flags & CXGB4VF_USING_MSI) != 0);
}
#undef R #undef T #undef S #undef R3 #undef T3 #undef S3
return 0;
}
/* * Return the number of "entries" in our "file". We group the multi-Queue * sections with QPL Queue Sets per "entry". The sections of the output are: * * Ethernet RX/TX Queue Sets * Firmware Event Queue * Forwarded Interrupt Queue (if in MSI mode)
*/ staticint sge_qstats_entries(conststruct adapter *adapter)
{ return DIV_ROUND_UP(adapter->sge.ethqsets, QPL) + 1 +
((adapter->flags & CXGB4VF_USING_MSI) != 0);
}
/* * Module and device initialization and cleanup code. * ==================================================
*/
/* * Set up out /sys/kernel/debug/cxgb4vf sub-nodes. We assume that the * directory (debugfs_root) has already been set up.
*/ staticint setup_debugfs(struct adapter *adapter)
{ int i;
BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
/* * Debugfs support is best effort.
*/ for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
debugfs_create_file(debugfs_files[i].name,
debugfs_files[i].mode,
adapter->debugfs_root, adapter,
debugfs_files[i].fops);
return 0;
}
/* * Tear down the /sys/kernel/debug/cxgb4vf sub-nodes created above. We leave * it to our caller to tear down the directory (debugfs_root).
*/ staticvoid cleanup_debugfs(struct adapter *adapter)
{
BUG_ON(IS_ERR_OR_NULL(adapter->debugfs_root));
/* * Unlike our sister routine cleanup_proc(), we don't need to remove * individual entries because a call will be made to * debugfs_remove_recursive(). We just need to clean up any ancillary * persistent state.
*/ /* nothing to do */
}
/* Figure out how many Ports and Queue Sets we can support. This depends on * knowing our Virtual Function Resources and may be called a second time if * we fall back from MSI-X to MSI Interrupt Mode.
*/ staticvoid size_nports_qsets(struct adapter *adapter)
{ struct vf_resources *vfres = &adapter->params.vfres; unsignedint ethqsets, pmask_nports;
/* The number of "ports" which we support is equal to the number of * Virtual Interfaces with which we've been provisioned.
*/
adapter->params.nports = vfres->nvi; if (adapter->params.nports > MAX_NPORTS) {
dev_warn(adapter->pdev_dev, "only using %d of %d maximum" " allowed virtual interfaces\n", MAX_NPORTS,
adapter->params.nports);
adapter->params.nports = MAX_NPORTS;
}
/* We may have been provisioned with more VIs than the number of * ports we're allowed to access (our Port Access Rights Mask). * This is obviously a configuration conflict but we don't want to * crash the kernel or anything silly just because of that.
*/
pmask_nports = hweight32(adapter->params.vfres.pmask); if (pmask_nports < adapter->params.nports) {
dev_warn(adapter->pdev_dev, "only using %d of %d provisioned" " virtual interfaces; limited by Port Access Rights" " mask %#x\n", pmask_nports, adapter->params.nports,
adapter->params.vfres.pmask);
adapter->params.nports = pmask_nports;
}
/* We need to reserve an Ingress Queue for the Asynchronous Firmware * Event Queue. And if we're using MSI Interrupts, we'll also need to * reserve an Ingress Queue for a Forwarded Interrupts. * * The rest of the FL/Intr-capable ingress queues will be matched up * one-for-one with Ethernet/Control egress queues in order to form * "Queue Sets" which will be aportioned between the "ports". For * each Queue Set, we'll need the ability to allocate two Egress * Contexts -- one for the Ingress Queue Free List and one for the TX * Ethernet Queue. * * Note that even if we're currently configured to use MSI-X * Interrupts (module variable msi == MSI_MSIX) we may get downgraded * to MSI Interrupts if we can't get enough MSI-X Interrupts. If that * happens we'll need to adjust things later.
*/
ethqsets = vfres->niqflint - 1 - (msi == MSI_MSI); if (vfres->nethctrl != ethqsets)
ethqsets = min(vfres->nethctrl, ethqsets); if (vfres->neq < ethqsets*2)
ethqsets = vfres->neq/2; if (ethqsets > MAX_ETH_QSETS)
ethqsets = MAX_ETH_QSETS;
adapter->sge.max_ethqsets = ethqsets;
if (adapter->sge.max_ethqsets < adapter->params.nports) {
dev_warn(adapter->pdev_dev, "only using %d of %d available" " virtual interfaces (too few Queue Sets)\n",
adapter->sge.max_ethqsets, adapter->params.nports);
adapter->params.nports = adapter->sge.max_ethqsets;
}
}
/* * Perform early "adapter" initialization. This is where we discover what * adapter parameters we're going to be using and initialize basic adapter * hardware support.
*/ staticint adap_init0(struct adapter *adapter)
{ struct sge_params *sge_params = &adapter->params.sge; struct sge *s = &adapter->sge; int err;
u32 param, val = 0;
/* * Some environments do not properly handle PCIE FLRs -- e.g. in Linux * 2.6.31 and later we can't call pci_reset_function() in order to * issue an FLR because of a self- deadlock on the device semaphore. * Meanwhile, the OS infrastructure doesn't issue FLRs in all the * cases where they're needed -- for instance, some versions of KVM * fail to reset "Assigned Devices" when the VM reboots. Therefore we * use the firmware based reset in order to reset any per function * state.
*/
err = t4vf_fw_reset(adapter); if (err < 0) {
dev_err(adapter->pdev_dev, "FW reset failed: err=%d\n", err); return err;
}
/* * Grab basic operational parameters. These will predominantly have * been set up by the Physical Function Driver or will be hard coded * into the adapter. We just have to live with them ... Note that * we _must_ get our VPD parameters before our SGE parameters because * we need to know the adapter's core clock from the VPD in order to * properly decode the SGE Timer Values.
*/
err = t4vf_get_dev_params(adapter); if (err) {
dev_err(adapter->pdev_dev, "unable to retrieve adapter" " device parameters: err=%d\n", err); return err;
}
err = t4vf_get_vpd_params(adapter); if (err) {
dev_err(adapter->pdev_dev, "unable to retrieve adapter" " VPD parameters: err=%d\n", err); return err;
}
err = t4vf_get_sge_params(adapter); if (err) {
dev_err(adapter->pdev_dev, "unable to retrieve adapter" " SGE parameters: err=%d\n", err); return err;
}
err = t4vf_get_rss_glb_config(adapter); if (err) {
dev_err(adapter->pdev_dev, "unable to retrieve adapter" " RSS parameters: err=%d\n", err); return err;
} if (adapter->params.rss.mode !=
FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
dev_err(adapter->pdev_dev, "unable to operate with global RSS" " mode %d\n", adapter->params.rss.mode); return -EINVAL;
}
err = t4vf_sge_init(adapter); if (err) {
dev_err(adapter->pdev_dev, "unable to use adapter parameters:" " err=%d\n", err); return err;
}
/* If we're running on newer firmware, let it know that we're * prepared to deal with encapsulated CPL messages. Older * firmware won't understand this and we'll just get * unencapsulated messages ...
*/
param = FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_PFVF) |
FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_PFVF_CPLFW4MSG_ENCAP);
val = 1;
(void) t4vf_set_params(adapter, 1, ¶m, &val);
/* * Grab our Virtual Interface resource allocation, extract the * features that we're interested in and do a bit of sanity testing on * what we discover.
*/
err = t4vf_get_vfres(adapter); if (err) {
dev_err(adapter->pdev_dev, "unable to get virtual interface" " resources: err=%d\n", err); return err;
}
/* Check for various parameter sanity issues */ if (adapter->params.vfres.pmask == 0) {
dev_err(adapter->pdev_dev, "no port access configured\n" "usable!\n"); return -EINVAL;
} if (adapter->params.vfres.nvi == 0) {
dev_err(adapter->pdev_dev, "no virtual interfaces configured/" "usable!\n"); return -EINVAL;
}
/* Initialize nports and max_ethqsets now that we have our Virtual * Function Resources.
*/
size_nports_qsets(adapter);
/* * Perform default configuration of DMA queues depending on the number and * type of ports we found and the number of available CPUs. Most settings can * be modified by the admin via ethtool and cxgbtool prior to the adapter * being brought up for the first time.
*/ staticvoid cfg_queues(struct adapter *adapter)
{ struct sge *s = &adapter->sge; int q10g, n10g, qidx, pidx, qs;
size_t iqe_size;
/* * We should not be called till we know how many Queue Sets we can * support. In particular, this means that we need to know what kind * of interrupts we'll be using ...
*/
BUG_ON((adapter->flags &
(CXGB4VF_USING_MSIX | CXGB4VF_USING_MSI)) == 0);
/* * Count the number of 10GbE Virtual Interfaces that we have.
*/
n10g = 0;
for_each_port(adapter, pidx)
n10g += is_x_10g_port(&adap2pinfo(adapter, pidx)->link_cfg);
/* * We default to 1 queue per non-10G port and up to # of cores queues * per 10G port.
*/ if (n10g == 0)
q10g = 0; else { int n1g = (adapter->params.nports - n10g);
q10g = (adapter->sge.max_ethqsets - n1g) / n10g; if (q10g > num_online_cpus())
q10g = num_online_cpus();
}
/* * Allocate the "Queue Sets" to the various Virtual Interfaces. * The layout will be established in setup_sge_queues() when the * adapter is brough up for the first time.
*/
qidx = 0;
for_each_port(adapter, pidx) { struct port_info *pi = adap2pinfo(adapter, pidx);
/* * The Ingress Queue Entry Size for our various Response Queues needs * to be big enough to accommodate the largest message we can receive * from the chip/firmware; which is 64 bytes ...
*/
iqe_size = 64;
/* * Set up default Queue Set parameters ... Start off with the * shortest interrupt holdoff timer.
*/ for (qs = 0; qs < s->max_ethqsets; qs++) { struct sge_eth_rxq *rxq = &s->ethrxq[qs]; struct sge_eth_txq *txq = &s->ethtxq[qs];
/* * The firmware event queue is used for link state changes and * notifications of TX DMA completions.
*/
init_rspq(&s->fw_evtq, SGE_TIMER_RSTRT_CNTR, 0, 512, iqe_size);
/* * The forwarded interrupt queue is used when we're in MSI interrupt * mode. In this mode all interrupts associated with RX queues will * be forwarded to a single queue which we'll associate with our MSI * interrupt vector. The messages dropped in the forwarded interrupt * queue will indicate which ingress queue needs servicing ... This * queue needs to be large enough to accommodate all of the ingress * queues which are forwarding their interrupt (+1 to prevent the PIDX * from equalling the CIDX if every ingress queue has an outstanding * interrupt). The queue doesn't need to be any larger because no * ingress queue will ever have more than one outstanding interrupt at * any time ...
*/
init_rspq(&s->intrq, SGE_TIMER_RSTRT_CNTR, 0, MSIX_ENTRIES + 1,
iqe_size);
}
/* * Reduce the number of Ethernet queues across all ports to at most n. * n provides at least one queue per port.
*/ staticvoid reduce_ethqs(struct adapter *adapter, int n)
{ int i; struct port_info *pi;
/* * While we have too many active Ether Queue Sets, interate across the * "ports" and reduce their individual Queue Set allocations.
*/
BUG_ON(n < adapter->params.nports); while (n < adapter->sge.ethqsets)
for_each_port(adapter, i) {
pi = adap2pinfo(adapter, i); if (pi->nqsets > 1) {
pi->nqsets--;
adapter->sge.ethqsets--; if (adapter->sge.ethqsets <= n) break;
}
}
/* * Reassign the starting Queue Sets for each of the "ports" ...
*/
n = 0;
for_each_port(adapter, i) {
pi = adap2pinfo(adapter, i);
pi->first_qset = n;
n += pi->nqsets;
}
}
/* * We need to grab enough MSI-X vectors to cover our interrupt needs. Ideally * we get a separate MSI-X vector for every "Queue Set" plus any extras we * need. Minimally we need one for every Virtual Interface plus those needed * for our "extras". Note that this process may lower the maximum number of * allowed Queue Sets ...
*/ staticint enable_msix(struct adapter *adapter)
{ int i, want, need, nqsets; struct msix_entry entries[MSIX_ENTRIES]; struct sge *s = &adapter->sge;
for (i = 0; i < MSIX_ENTRIES; ++i)
entries[i].entry = i;
/* * We _want_ enough MSI-X interrupts to cover all of our "Queue Sets" * plus those needed for our "extras" (for example, the firmware * message queue). We _need_ at least one "Queue Set" per Virtual * Interface plus those needed for our "extras". So now we get to see * if the song is right ...
*/
want = s->max_ethqsets + MSIX_EXTRAS;
need = adapter->params.nports + MSIX_EXTRAS;
want = pci_enable_msix_range(adapter->pdev, entries, need, want); if (want < 0) return want;
nqsets = want - MSIX_EXTRAS; if (nqsets < s->max_ethqsets) {
dev_warn(adapter->pdev_dev, "only enough MSI-X vectors" " for %d Queue Sets\n", nqsets);
s->max_ethqsets = nqsets; if (nqsets < s->ethqsets)
reduce_ethqs(adapter, nqsets);
} for (i = 0; i < want; ++i)
adapter->msix_info[i].vec = entries[i].vector;
/** * cxgb4vf_get_port_mask - Get port mask for the VF based on mac * address stored on the adapter * @adapter: The adapter * * Find the port mask for the VF based on the index of mac * address stored in the adapter. If no mac address is stored on * the adapter for the VF, use the port mask received from the * firmware.
*/ staticunsignedint cxgb4vf_get_port_mask(struct adapter *adapter)
{ unsignedint naddr = 1, pidx = 0; unsignedint pmask, rmask = 0;
u8 mac[ETH_ALEN]; int err;
pmask = adapter->params.vfres.pmask; while (pmask) { if (pmask & 1) {
err = t4vf_get_vf_mac_acl(adapter, pidx, &naddr, mac); if (!err && !is_zero_ether_addr(mac))
rmask |= (1 << pidx);
}
pmask >>= 1;
pidx++;
} if (!rmask)
rmask = adapter->params.vfres.pmask;
return rmask;
}
/* * "Probe" a device: initialize a device and construct all kernel and driver * state needed to manage the device. This routine is called "init_one" in * the PF Driver ...
*/ staticint cxgb4vf_pci_probe(struct pci_dev *pdev, conststruct pci_device_id *ent)
{ struct adapter *adapter; struct net_device *netdev; struct port_info *pi; unsignedint pmask; int err, pidx;
/* * Reserve PCI resources for the device. If we can't get them some * other driver may have already claimed the device ...
*/
err = pci_request_regions(pdev, KBUILD_MODNAME); if (err) {
dev_err(&pdev->dev, "cannot obtain PCI resources\n"); goto err_disable_device;
}
/* * Set up our DMA mask
*/
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) {
dev_err(&pdev->dev, "no usable DMA configuration\n"); goto err_release_regions;
}
/* * Enable bus mastering for the device ...
*/
pci_set_master(pdev);
/* * Allocate our adapter data structure and attach it to the device.
*/
adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); if (!adapter) {
err = -ENOMEM; goto err_release_regions;
}
pci_set_drvdata(pdev, adapter);
adapter->pdev = pdev;
adapter->pdev_dev = &pdev->dev;
/* Wait for the device to become ready before proceeding ...
*/
err = t4vf_prep_adapter(adapter); if (err) {
dev_err(adapter->pdev_dev, "device didn't become ready:" " err=%d\n", err); goto err_unmap_bar0;
}
/* For T5 and later we want to use the new BAR-based User Doorbells, * so we need to map BAR2 here ...
*/ if (!is_t4(adapter->params.chip)) {
adapter->bar2 = ioremap_wc(pci_resource_start(pdev, 2),
pci_resource_len(pdev, 2)); if (!adapter->bar2) {
dev_err(adapter->pdev_dev, "cannot map BAR2 doorbells\n");
err = -ENOMEM; goto err_unmap_bar0;
}
} /* * Initialize adapter level features.
*/
adapter->name = pci_name(pdev);
adapter->msg_enable = DFLT_MSG_ENABLE;
/* If possible, we use PCIe Relaxed Ordering Attribute to deliver * Ingress Packet Data to Free List Buffers in order to allow for * chipset performance optimizations between the Root Complex and * Memory Controllers. (Messages to the associated Ingress Queue * notifying new Packet Placement in the Free Lists Buffers will be * send without the Relaxed Ordering Attribute thus guaranteeing that * all preceding PCIe Transaction Layer Packets will be processed * first.) But some Root Complexes have various issues with Upstream * Transaction Layer Packets with the Relaxed Ordering Attribute set. * The PCIe devices which under the Root Complexes will be cleared the * Relaxed Ordering bit in the configuration space, So we check our * PCIe configuration space to see if it's flagged with advice against * using Relaxed Ordering.
*/ if (!pcie_relaxed_ordering_enabled(pdev))
adapter->flags |= CXGB4VF_ROOT_NO_RELAXED_ORDERING;
err = adap_init0(adapter); if (err)
dev_err(&pdev->dev, "Adapter initialization failed, error %d. Continuing in debug mode\n",
err);
/* Initialize hash mac addr list */
INIT_LIST_HEAD(&adapter->mac_hlist);
/* * We simplistically allocate our virtual interfaces * sequentially across the port numbers to which we have * access rights. This should be configurable in some manner * ...
*/ if (pmask == 0) break;
port_id = ffs(pmask) - 1;
pmask &= ~(1 << port_id);
/* * If we haven't been able to contact the firmware, there's * nothing else we can do for this "port" ...
*/ if (!(adapter->flags & CXGB4VF_FW_OK)) continue;
viid = t4vf_alloc_vi(adapter, port_id); if (viid < 0) {
dev_err(&pdev->dev, "cannot allocate VI for port %d: err=%d\n",
port_id, viid);
err = viid; goto err_free_dev;
}
pi->viid = viid;
/* * Initialize the hardware/software state for the port.
*/
err = t4vf_port_init(adapter, pidx); if (err) {
dev_err(&pdev->dev, "cannot initialize port %d\n",
pidx); goto err_free_dev;
}
ether_addr_copy(addr.sa_data, mac);
err = cxgb4vf_set_mac_addr(netdev, &addr); if (err) {
dev_err(&pdev->dev, "unable to set MAC address %pM\n",
mac); goto err_free_dev;
}
dev_info(&pdev->dev, "Using assigned MAC ACL: %pM\n", mac);
}
}
/* See what interrupts we'll be using. If we've been configured to * use MSI-X interrupts, try to enable them but fall back to using * MSI interrupts if we can't enable MSI-X interrupts. If we can't * get MSI interrupts we bail with the error.
*/ if (msi == MSI_MSIX && enable_msix(adapter) == 0)
adapter->flags |= CXGB4VF_USING_MSIX; else { if (msi == MSI_MSIX) {
dev_info(adapter->pdev_dev, "Unable to use MSI-X Interrupts; falling " "back to MSI Interrupts\n");
/* We're going to need a Forwarded Interrupt Queue so * that may cut into how many Queue Sets we can * support.
*/
msi = MSI_MSI;
size_nports_qsets(adapter);
}
err = pci_enable_msi(pdev); if (err) {
dev_err(&pdev->dev, "Unable to allocate MSI Interrupts;" " err=%d\n", err); goto err_free_dev;
}
adapter->flags |= CXGB4VF_USING_MSI;
}
/* Now that we know how many "ports" we have and what interrupt * mechanism we're going to use, we can configure our queue resources.
*/
cfg_queues(adapter);
/* * The "card" is now ready to go. If any errors occur during device * registration we do not fail the whole "card" but rather proceed * only with the ports we manage to register successfully. However we * must register at least one net device.
*/
for_each_port(adapter, pidx) { struct port_info *pi = netdev_priv(adapter->port[pidx]);
netdev = adapter->port[pidx]; if (netdev == NULL) continue;
err = register_netdev(netdev); if (err) {
dev_warn(&pdev->dev, "cannot register net device %s," " skipping\n", netdev->name); continue;
}
netif_carrier_off(netdev);
set_bit(pidx, &adapter->registered_device_map);
} if (adapter->registered_device_map == 0) {
dev_err(&pdev->dev, "could not register any net devices\n");
err = -EINVAL; goto err_disable_interrupts;
}
/* * Set up our debugfs entries.
*/ if (!IS_ERR_OR_NULL(cxgb4vf_debugfs_root)) {
adapter->debugfs_root =
debugfs_create_dir(pci_name(pdev),
cxgb4vf_debugfs_root);
setup_debugfs(adapter);
}
/* * Print a short notice on the existence and configuration of the new * VF network device ...
*/
for_each_port(adapter, pidx) {
dev_info(adapter->pdev_dev, "%s: Chelsio VF NIC PCIe %s\n",
adapter->port[pidx]->name,
(adapter->flags & CXGB4VF_USING_MSIX) ? "MSI-X" :
(adapter->flags & CXGB4VF_USING_MSI) ? "MSI" : "");
}
/* * Return success!
*/ return 0;
/* * Error recovery and exit code. Unwind state that's been created * so far and return the error.
*/
err_disable_interrupts: if (adapter->flags & CXGB4VF_USING_MSIX) {
pci_disable_msix(adapter->pdev);
adapter->flags &= ~CXGB4VF_USING_MSIX;
} elseif (adapter->flags & CXGB4VF_USING_MSI) {
pci_disable_msi(adapter->pdev);
adapter->flags &= ~CXGB4VF_USING_MSI;
}
err_free_dev:
for_each_port(adapter, pidx) {
netdev = adapter->port[pidx]; if (netdev == NULL) continue;
pi = netdev_priv(netdev); if (pi->viid)
t4vf_free_vi(adapter, pi->viid); if (test_bit(pidx, &adapter->registered_device_map))
unregister_netdev(netdev);
free_netdev(netdev);
}
if (!is_t4(adapter->params.chip))
iounmap(adapter->bar2);
/* * "Remove" a device: tear down all kernel and driver state created in the * "probe" routine and quiesce the device (disable interrupts, etc.). (Note * that this is called "remove_one" in the PF Driver.)
*/ staticvoid cxgb4vf_pci_remove(struct pci_dev *pdev)
{ struct adapter *adapter = pci_get_drvdata(pdev); struct hash_mac_addr *entry, *tmp;
/* * Tear down driver state associated with device.
*/ if (adapter) { int pidx;
/* * Stop all of our activity. Unregister network port, * disable interrupts, etc.
*/
for_each_port(adapter, pidx) if (test_bit(pidx, &adapter->registered_device_map))
unregister_netdev(adapter->port[pidx]);
t4vf_sge_stop(adapter); if (adapter->flags & CXGB4VF_USING_MSIX) {
pci_disable_msix(adapter->pdev);
adapter->flags &= ~CXGB4VF_USING_MSIX;
} elseif (adapter->flags & CXGB4VF_USING_MSI) {
pci_disable_msi(adapter->pdev);
adapter->flags &= ~CXGB4VF_USING_MSI;
}
/* * Tear down our debugfs entries.
*/ if (!IS_ERR_OR_NULL(adapter->debugfs_root)) {
cleanup_debugfs(adapter);
debugfs_remove_recursive(adapter->debugfs_root);
}
/* * Free all of the various resources which we've acquired ...
*/
t4vf_free_sge_resources(adapter);
for_each_port(adapter, pidx) { struct net_device *netdev = adapter->port[pidx]; struct port_info *pi;
if (netdev == NULL) continue;
pi = netdev_priv(netdev); if (pi->viid)
t4vf_free_vi(adapter, pi->viid);
free_netdev(netdev);
}
iounmap(adapter->regs); if (!is_t4(adapter->params.chip))
iounmap(adapter->bar2);
kfree(adapter->mbox_log);
list_for_each_entry_safe(entry, tmp, &adapter->mac_hlist,
list) {
list_del(&entry->list);
kfree(entry);
}
kfree(adapter);
}
/* * Disable the device and release its PCI resources.
*/
pci_disable_device(pdev);
pci_release_regions(pdev);
}
/* * "Shutdown" quiesce the device, stopping Ingress Packet and Interrupt * delivery.
*/ staticvoid cxgb4vf_pci_shutdown(struct pci_dev *pdev)
{ struct adapter *adapter; int pidx;
adapter = pci_get_drvdata(pdev); if (!adapter) return;
/* Disable all Virtual Interfaces. This will shut down the * delivery of all ingress packets into the chip for these * Virtual Interfaces.
*/
for_each_port(adapter, pidx) if (test_bit(pidx, &adapter->registered_device_map))
unregister_netdev(adapter->port[pidx]);
/* Free up all Queues which will prevent further DMA and * Interrupts allowing various internal pathways to drain.
*/
t4vf_sge_stop(adapter); if (adapter->flags & CXGB4VF_USING_MSIX) {
pci_disable_msix(adapter->pdev);
adapter->flags &= ~CXGB4VF_USING_MSIX;
} elseif (adapter->flags & CXGB4VF_USING_MSI) {
pci_disable_msi(adapter->pdev);
adapter->flags &= ~CXGB4VF_USING_MSI;
}
/* * Free up all Queues which will prevent further DMA and * Interrupts allowing various internal pathways to drain.
*/
t4vf_free_sge_resources(adapter);
pci_set_drvdata(pdev, NULL);
}
/* Macros needed to support the PCI Device ID Table ...
*/ #define CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN \ staticconststruct pci_device_id cxgb4vf_pci_tbl[] = { #define CH_PCI_DEVICE_ID_FUNCTION 0x8
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen0.50Angebot
(Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können 2026-04-28)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.