/* num_vfs module param is obsolete. * Use sysfs method to enable/disable VFs.
*/ staticunsignedint num_vfs;
module_param(num_vfs, uint, 0444);
MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
static ushort rx_frag_size = 2048;
module_param(rx_frag_size, ushort, 0444);
MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
/* Per-module error detection/recovery workq shared across all functions. * Each function schedules its own work request on this shared workq.
*/ staticstruct workqueue_struct *be_err_recovery_workq;
val |= qid & DB_EQ_RING_ID_MASK;
val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
if (be_check_error(adapter, BE_ERROR_HW)) return;
if (arm)
val |= 1 << DB_EQ_REARM_SHIFT; if (clear_int)
val |= 1 << DB_EQ_CLR_SHIFT;
val |= 1 << DB_EQ_EVNT_SHIFT;
val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
iowrite32(val, adapter->db + DB_EQ_OFFSET);
}
val |= qid & DB_CQ_RING_ID_MASK;
val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
DB_CQ_RING_ID_EXT_MASK_SHIFT);
if (be_check_error(adapter, BE_ERROR_HW)) return;
if (arm)
val |= 1 << DB_CQ_REARM_SHIFT;
val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
iowrite32(val, adapter->db + DB_CQ_OFFSET);
}
staticint be_dev_mac_add(struct be_adapter *adapter, const u8 *mac)
{ int i;
/* Check if mac has already been added as part of uc-list */ for (i = 0; i < adapter->uc_macs; i++) { if (ether_addr_equal(adapter->uc_list[i].mac, mac)) { /* mac already added, skip addition */
adapter->pmac_id[0] = adapter->pmac_id[i + 1]; return 0;
}
}
staticvoid be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
{ int i;
/* Skip deletion if the programmed mac is * being used in uc-list
*/ for (i = 0; i < adapter->uc_macs; i++) { if (adapter->pmac_id[i + 1] == pmac_id) return;
}
be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
}
if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL;
/* Proceed further only if, User provided MAC is different * from active MAC
*/ if (ether_addr_equal(addr->sa_data, adapter->dev_mac)) return 0;
/* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC * address
*/ if (BEx_chip(adapter) && be_virtfn(adapter) &&
!check_privilege(adapter, BE_PRIV_FILTMGMT)) return -EPERM;
/* if device is not running, copy MAC to netdev->dev_addr */ if (!netif_running(netdev)) goto done;
/* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT * privilege or if PF did not provision the new MAC address. * On BE3, this cmd will always fail if the VF doesn't have the * FILTMGMT privilege. This failure is OK, only if the PF programmed * the MAC for the VF.
*/
mutex_lock(&adapter->rx_filter_lock);
status = be_dev_mac_add(adapter, (u8 *)addr->sa_data); if (!status) {
/* Delete the old programmed MAC. This call may fail if the * old MAC was already deleted by the PF driver.
*/ if (adapter->pmac_id[0] != old_pmac_id)
be_dev_mac_del(adapter, old_pmac_id);
}
mutex_unlock(&adapter->rx_filter_lock); /* Decide if the new MAC is successfully activated only after * querying the FW
*/
status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
adapter->if_handle, true, 0); if (status) goto err;
/* The MAC change did not happen, either due to lack of privilege * or PF didn't pre-provision.
*/ if (!ether_addr_equal(addr->sa_data, mac)) {
status = -EPERM; goto err;
}
/* Remember currently programmed MAC */
ether_addr_copy(adapter->dev_mac, addr->sa_data);
done:
eth_hw_addr_set(netdev, addr->sa_data);
dev_info(dev, "MAC address changed to %pM\n", addr->sa_data); return 0;
err:
dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data); return status;
}
/* BE2 supports only v0 cmd */ staticvoid *hw_stats_from_cmd(struct be_adapter *adapter)
{ if (BE2_chip(adapter)) { struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
/* receiver fifo overrun */ /* drops_no_pbuf is no per i/f, it's per BE card */
stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
drvs->rx_input_fifo_overflow_drop +
drvs->rx_drops_no_pbuf;
}
/* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb * to avoid the swap and shift/mask operations in wrb_fill().
*/ staticinlinevoid wrb_fill_dummy(struct be_eth_wrb *wrb)
{
wrb->frag_pa_hi = 0;
wrb->frag_pa_lo = 0;
wrb->frag_len = 0;
wrb->rsvd0 = 0;
}
vlan_tag = skb_vlan_tag_get(skb);
vlan_prio = skb_vlan_tag_get_prio(skb); /* If vlan priority provided by OS is NOT in available bmap */ if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
adapter->recommended_prio_bits;
return vlan_tag;
}
/* Used only for IP tunnel packets */ static u16 skb_inner_ip_proto(struct sk_buff *skb)
{ return (inner_ip_hdr(skb)->version == 4) ?
inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
}
/* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this * hack is not needed, the evt bit is set while ringing DB.
*/
SET_TX_WRB_HDR_BITS(event, hdr,
BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
SET_TX_WRB_HDR_BITS(vlan, hdr,
BE_WRB_F_GET(wrb_params->features, VLAN));
SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
/* Bring the queue back to the state it was in before be_xmit_enqueue() routine * was invoked. The producer index is restored to the previous packet and the * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
*/ staticvoid be_xmit_restore(struct be_adapter *adapter, struct be_tx_obj *txo, u32 head, bool map_single,
u32 copied)
{ struct device *dev; struct be_eth_wrb *wrb; struct be_queue_info *txq = &txo->q;
dev = &adapter->pdev->dev;
txq->head = head;
/* skip the first wrb (hdr); it's not mapped */
queue_head_inc(txq); while (copied) {
wrb = queue_head_node(txq);
unmap_tx_frag(dev, wrb, map_single);
map_single = false;
copied -= le32_to_cpu(wrb->frag_len);
queue_head_inc(txq);
}
txq->head = head;
}
/* Enqueue the given packet for transmit. This routine allocates WRBs for the * packet, dma maps the packet buffers and sets up the WRBs. Returns the number * of WRBs used up by the packet.
*/ static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo, struct sk_buff *skb, struct be_wrb_params *wrb_params)
{
u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb); struct device *dev = &adapter->pdev->dev; bool map_single = false;
u32 head;
dma_addr_t busaddr; int len;
head = be_tx_get_wrb_hdr(txo);
if (skb->len > skb->data_len) {
len = skb_headlen(skb);
/* For padded packets, BE HW modifies tot_len field in IP header * incorrecly when VLAN tag is inserted by HW. * For padded packets, Lancer computes incorrect checksum.
*/
eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
VLAN_ETH_HLEN : ETH_HLEN; if (skb->len <= 60 &&
(lancer_chip(adapter) || BE3_chip(adapter) ||
skb_vlan_tag_present(skb)) && is_ipv4_pkt(skb)) {
ip = (struct iphdr *)ip_hdr(skb); if (unlikely(pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len)))) goto tx_drop;
}
/* If vlan tag is already inlined in the packet, skip HW VLAN * tagging in pvid-tagging mode
*/ if (be_pvid_tagging_enabled(adapter) &&
veh->h_vlan_proto == htons(ETH_P_8021Q))
BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
/* HW has a bug wherein it will calculate CSUM for VLAN * pkts even though it is disabled. * Manually insert VLAN in pkt.
*/ if (skb->ip_summed != CHECKSUM_PARTIAL &&
skb_vlan_tag_present(skb)) {
skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params); if (unlikely(!skb)) goto err;
}
/* HW may lockup when VLAN HW tagging is requested on * certain ipv6 packets. Drop such pkts if the HW workaround to * skip HW tagging is not enabled by FW.
*/ if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
(adapter->pvid || adapter->qnq_vid) &&
!qnq_async_evt_rcvd(adapter))) goto tx_drop;
/* Manual VLAN tag insertion to prevent: * ASIC lockup when the ASIC inserts VLAN tag into * certain ipv6 packets. Insert VLAN tags in driver, * and set event, completion, vlan bits accordingly * in the Tx WRB.
*/ if (be_ipv6_tx_stall_chk(adapter, skb) &&
be_vlan_tag_tx_chk(adapter, skb)) {
skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params); if (unlikely(!skb)) goto err;
}
/* Lancer, SH and BE3 in SRIOV mode have a bug wherein * packets that are 32b or less may cause a transmit stall * on that port. The workaround is to pad such packets * (len <= 32 bytes) to a minimum length of 36b.
*/ if (skb->len <= 32) { if (skb_put_padto(skb, 36)) return NULL;
}
if (BEx_chip(adapter) || lancer_chip(adapter)) {
skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params); if (!skb) return NULL;
}
/* The stack can send us skbs with length greater than * what the HW can handle. Trim the extra bytes.
*/
WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
err = pskb_trim(skb, BE_MAX_GSO_SIZE);
WARN_ON(err);
/* Mark the last request eventable if it hasn't been marked already */ if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
/* compose a dummy wrb if there are odd set of wrbs to notify */ if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
wrb_fill_dummy(queue_head_node(txq));
queue_head_inc(txq);
atomic_inc(&txq->used);
txo->pend_wrb_cnt++;
hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
TX_HDR_WRB_NUM_SHIFT);
hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
TX_HDR_WRB_NUM_SHIFT);
}
be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
txo->pend_wrb_cnt = 0;
}
if (is_udp_pkt((*skb))) { struct udphdr *udp = udp_hdr((*skb));
switch (ntohs(udp->dest)) { case DHCP_CLIENT_PORT:
os2bmc = is_dhcp_client_filt_enabled(adapter); goto done; case DHCP_SERVER_PORT:
os2bmc = is_dhcp_srvr_filt_enabled(adapter); goto done; case NET_BIOS_PORT1: case NET_BIOS_PORT2:
os2bmc = is_nbios_filt_enabled(adapter); goto done; case DHCPV6_RAS_PORT:
os2bmc = is_ipv6_ras_filt_enabled(adapter); goto done; default: break;
}
}
done: /* For packets over a vlan, which are destined * to BMC, asic expects the vlan to be inline in the packet.
*/ if (os2bmc)
*skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params); if (unlikely(!wrb_cnt)) goto drop_skb;
/* if os2bmc is enabled and if the pkt is destined to bmc, * enqueue the pkt a 2nd time with mgmt bit set.
*/ if (be_send_pkt_to_bmc(adapter, &skb)) {
BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params); if (unlikely(!wrb_cnt)) goto drop_skb; else
skb_get(skb);
}
if (be_is_txq_full(txo)) {
netif_stop_subqueue(netdev, q_idx);
tx_stats(txo)->tx_stops++;
}
if (flush || __netif_subqueue_stopped(netdev, q_idx))
be_xmit_flush(adapter, txo);
return NETDEV_TX_OK;
drop_skb:
dev_kfree_skb_any(skb);
drop:
tx_stats(txo)->tx_drv_drops++; /* Flush the already enqueued tx requests */ if (flush && txo->pend_wrb_cnt)
be_xmit_flush(adapter, txo);
status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF); if (!status) {
dev_info(dev, "Disabling VLAN promiscuous mode\n");
adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
} return status;
}
/* * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE. * If the user configures more, place BE in vlan promiscuous mode.
*/ staticint be_vid_config(struct be_adapter *adapter)
{ struct device *dev = &adapter->pdev->dev;
u16 vids[BE_NUM_VLANS_SUPPORTED];
u16 num = 0, i = 0; int status = 0;
/* No need to change the VLAN state if the I/F is in promiscuous */ if (adapter->netdev->flags & IFF_PROMISC) return 0;
if (adapter->vlans_added > be_max_vlans(adapter)) return be_set_vlan_promisc(adapter);
if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
status = be_clear_vlan_promisc(adapter); if (status) return status;
} /* Construct VLAN Table to give to HW */
for_each_set_bit(i, adapter->vids, VLAN_N_VID)
vids[num++] = cpu_to_le16(i);
status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0); if (status) {
dev_err(dev, "Setting HW VLAN filtering failed\n"); /* Set to VLAN promisc mode as setting VLAN filter failed */ if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
addl_status(status) ==
MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES) return be_set_vlan_promisc(adapter);
} return status;
}
staticint be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
{ struct be_adapter *adapter = netdev_priv(netdev); int status = 0;
mutex_lock(&adapter->rx_filter_lock);
/* Packets with VID 0 are always received by Lancer by default */ if (lancer_chip(adapter) && vid == 0) goto done;
staticvoid be_set_mc_promisc(struct be_adapter *adapter)
{ int status;
if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) return;
status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON); if (!status)
adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
}
staticvoid be_set_uc_promisc(struct be_adapter *adapter)
{ int status;
if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) return;
status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON); if (!status)
adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
}
staticvoid be_clear_uc_promisc(struct be_adapter *adapter)
{ int status;
if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)) return;
status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF); if (!status)
adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
}
/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync(). * We use a single callback function for both sync and unsync. We really don't * add/remove addresses through this callback. But, we use it to detect changes * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
*/ staticint be_uc_list_update(struct net_device *netdev, constunsignedchar *addr)
{ struct be_adapter *adapter = netdev_priv(netdev);
if (netdev->flags & IFF_PROMISC) {
adapter->update_mc_list = false;
} elseif (netdev->flags & IFF_ALLMULTI ||
netdev_mc_count(netdev) > be_max_mc(adapter)) { /* Enable multicast promisc if num configured exceeds * what we support
*/
mc_promisc = true;
adapter->update_mc_list = false;
} elseif (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) { /* Update mc-list unconditionally if the iface was previously * in mc-promisc mode and now is out of that mode.
*/
adapter->update_mc_list = true;
}
if (adapter->update_mc_list) { int i = 0;
/* cache the mc-list in adapter */
netdev_for_each_mc_addr(ha, netdev) {
ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
i++;
}
adapter->mc_count = netdev_mc_count(netdev);
}
netif_addr_unlock_bh(netdev);
if (mc_promisc) {
be_set_mc_promisc(adapter);
} elseif (adapter->update_mc_list) {
status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON); if (!status)
adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS; else
be_set_mc_promisc(adapter);
if (netdev->flags & IFF_PROMISC) {
adapter->update_uc_list = false;
} elseif (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
uc_promisc = true;
adapter->update_uc_list = false;
} elseif (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) { /* Update uc-list unconditionally if the iface was previously * in uc-promisc mode and now is out of that mode.
*/
adapter->update_uc_list = true;
}
if (adapter->update_uc_list) { /* cache the uc-list in adapter array */
i = 0;
netdev_for_each_uc_addr(ha, netdev) {
ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
i++;
}
curr_uc_macs = netdev_uc_count(netdev);
}
netif_addr_unlock_bh(netdev);
if (uc_promisc) {
be_set_uc_promisc(adapter);
} elseif (adapter->update_uc_list) {
be_clear_uc_promisc(adapter);
for (i = 0; i < adapter->uc_macs; i++)
be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
for (i = 0; i < curr_uc_macs; i++)
be_uc_mac_add(adapter, i);
adapter->uc_macs = curr_uc_macs;
adapter->update_uc_list = false;
}
}
if (netdev->flags & IFF_PROMISC) { if (!be_in_all_promisc(adapter))
be_set_all_promisc(adapter);
} elseif (be_in_all_promisc(adapter)) { /* We need to re-program the vlan-list or clear * vlan-promisc mode (if needed) when the interface * comes out of promisc mode.
*/
be_vid_config(adapter);
}
staticint be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
{ struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
u16 vids[BE_NUM_VLANS_SUPPORTED]; int vf_if_id = vf_cfg->if_handle; int status;
/* Enable Transparent VLAN Tagging */
status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0); if (status) return status;
/* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
vids[0] = 0;
status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1); if (!status)
dev_info(&adapter->pdev->dev, "Cleared guest VLANs on VF%d", vf);
/* After TVT is enabled, disallow VFs to program VLAN filters */ if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
~BE_PRIV_FILTMGMT, vf + 1); if (!status)
vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
} return 0;
}
staticint be_clear_vf_tvt(struct be_adapter *adapter, int vf)
{ struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; struct device *dev = &adapter->pdev->dev; int status;
/* Reset Transparent VLAN Tagging. */
status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
vf_cfg->if_handle, 0, 0); if (status) return status;
/* Allow VFs to program VLAN filtering */ if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
BE_PRIV_FILTMGMT, vf + 1); if (!status) {
vf_cfg->privileges |= BE_PRIV_FILTMGMT;
dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
}
}
dev_info(dev, "Disable/re-enable i/f in VM to clear Transparent VLAN tag"); return 0;
}
if (vlan_proto != htons(ETH_P_8021Q)) return -EPROTONOSUPPORT;
if (vlan || qos) {
vlan |= qos << VLAN_PRIO_SHIFT;
status = be_set_vf_tvt(adapter, vf, vlan);
} else {
status = be_clear_vf_tvt(adapter, vf);
}
if (status) {
dev_err(&adapter->pdev->dev, "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
status); return be_cmd_status(status);
}
vf_cfg->vlan_tag = vlan; return 0;
}
staticint be_set_vf_tx_rate(struct net_device *netdev, int vf, int min_tx_rate, int max_tx_rate)
{ struct be_adapter *adapter = netdev_priv(netdev); struct device *dev = &adapter->pdev->dev; int percent_rate, status = 0;
u16 link_speed = 0;
u8 link_status;
if (!sriov_enabled(adapter)) return -EPERM;
if (vf >= adapter->num_vfs) return -EINVAL;
if (min_tx_rate) return -EINVAL;
if (!max_tx_rate) goto config_qos;
status = be_cmd_link_status_query(adapter, &link_speed,
&link_status, 0); if (status) goto err;
if (!link_status) {
dev_err(dev, "TX-rate setting not allowed when link is down\n");
status = -ENETDOWN; goto err;
}
if (max_tx_rate < 100 || max_tx_rate > link_speed) {
dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
link_speed);
status = -EINVAL; goto err;
}
/* On Skyhawk the QOS setting must be done only as a % value */
percent_rate = link_speed / 100; if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
percent_rate);
status = -EINVAL; goto err;
}
config_qos:
status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1); if (status) goto err;
/* Throwaway the data in the Rx completion */ staticvoid be_rx_compl_discard(struct be_rx_obj *rxo, struct be_rx_compl_info *rxcp)
{ struct be_rx_page_info *page_info;
u16 i, num_rcvd = rxcp->num_rcvd;
for (i = 0; i < num_rcvd; i++) {
page_info = get_rx_page_info(rxo);
put_page(page_info->page);
memset(page_info, 0, sizeof(*page_info));
}
}
/* * skb_fill_rx_data forms a complete skb for an ether frame * indicated by rxcp.
*/ staticvoid skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb, struct be_rx_compl_info *rxcp)
{ struct be_rx_page_info *page_info;
u16 i, j;
u16 hdr_len, curr_frag_len, remaining;
u8 *start;
/* Copy data in the first descriptor of this completion */
curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
skb->len = curr_frag_len; if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
memcpy(skb->data, start, curr_frag_len); /* Complete packet has now been moved to data */
put_page(page_info->page);
skb->data_len = 0;
skb->tail += curr_frag_len;
} else {
hdr_len = ETH_HLEN;
memcpy(skb->data, start, hdr_len);
skb_shinfo(skb)->nr_frags = 1;
skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[0],
page_info->page,
page_info->page_offset + hdr_len,
curr_frag_len - hdr_len);
skb->data_len = curr_frag_len - hdr_len;
skb->truesize += rx_frag_size;
skb->tail += hdr_len;
}
page_info->page = NULL;
if (rxcp->pkt_size <= rx_frag_size) {
BUG_ON(rxcp->num_rcvd != 1); return;
}
/* More frags present for this completion */
remaining = rxcp->pkt_size - curr_frag_len; for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
page_info = get_rx_page_info(rxo);
curr_frag_len = min(remaining, rx_frag_size);
/* Coalesce all frags from the same physical page in one slot */ if (page_info->page_offset == 0) { /* Fresh page */
j++;
skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
page_info->page,
page_info->page_offset,
curr_frag_len);
skb_shinfo(skb)->nr_frags++;
} else {
put_page(page_info->page);
skb_frag_size_add(&skb_shinfo(skb)->frags[j],
curr_frag_len);
}
if (rxcp->vlanf)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
netif_receive_skb(skb);
}
/* Process the RX completion indicated by rxcp when GRO is enabled */ staticvoid be_rx_compl_process_gro(struct be_rx_obj *rxo, struct napi_struct *napi, struct be_rx_compl_info *rxcp)
{ struct be_adapter *adapter = rxo->adapter; struct be_rx_page_info *page_info; struct sk_buff *skb = NULL;
u16 remaining, curr_frag_len;
u16 i, j;
skb = napi_get_frags(napi); if (!skb) {
be_rx_compl_discard(rxo, rxcp); return;
}
remaining = rxcp->pkt_size; for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
page_info = get_rx_page_info(rxo);
curr_frag_len = min(remaining, rx_frag_size);
/* Coalesce all frags from the same physical page in one slot */ if (i == 0 || page_info->page_offset == 0) { /* First frag or Fresh page */
j++;
skb_frag_fill_page_desc(&skb_shinfo(skb)->frags[j],
page_info->page,
page_info->page_offset,
curr_frag_len);
} else {
put_page(page_info->page);
skb_frag_size_add(&skb_shinfo(skb)->frags[j],
curr_frag_len);
}
/* For checking the valid bit it is Ok to use either definition as the
* valid bit is at the same position in both v0 and v1 Rx compl */ if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0) return NULL;
rmb();
be_dws_le_to_cpu(compl, sizeof(*compl));
if (adapter->be3_native)
be_parse_rx_compl_v1(compl, rxcp); else
be_parse_rx_compl_v0(compl, rxcp);
if (rxcp->ip_frag)
rxcp->l4_csum = 0;
if (rxcp->vlanf) { /* In QNQ modes, if qnq bit is not set, then the packet was * tagged only with the transparent outer vlan-tag and must * not be treated as a vlan packet by host
*/ if (be_is_qnq_mode(adapter) && !rxcp->qnq)
rxcp->vlanf = 0;
if (!lancer_chip(adapter))
rxcp->vlan_tag = swab16(rxcp->vlan_tag);
/* Any space left in the current big page for another frag? */ if ((page_offset + rx_frag_size + rx_frag_size) >
adapter->big_page_size) {
pagep = NULL;
page_info->last_frag = true;
dma_unmap_addr_set(page_info, bus, page_dmaaddr);
} else {
dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
}
/* Mark the last frag of a page when we break out of the above loop * with no more slots available in the RXQ
*/ if (pagep) {
prev_page_info->last_frag = true;
dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
}
if (posted) {
atomic_add(posted, &rxq->used); if (rxo->rx_post_starved)
rxo->rx_post_starved = false; do {
notify = min(MAX_NUM_POST_ERX_DB, posted);
be_rxq_notify(adapter, rxq->id, notify);
posted -= notify;
} while (posted);
} elseif (atomic_read(&rxq->used) == 0) { /* Let be_worker replenish when memory is available */
rxo->rx_post_starved = true;
}
}
staticinlinevoid be_update_tx_err(struct be_tx_obj *txo, u8 status)
{ switch (status) { case BE_TX_COMP_HDR_PARSE_ERR:
tx_stats(txo)->tx_hdr_parse_err++; break; case BE_TX_COMP_NDMA_ERR:
tx_stats(txo)->tx_dma_err++; break; case BE_TX_COMP_ACL_ERR:
tx_stats(txo)->tx_spoof_check_err++; break;
}
}
staticinlinevoid lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
{ switch (status) { case LANCER_TX_COMP_LSO_ERR:
tx_stats(txo)->tx_tso_err++; break; case LANCER_TX_COMP_HSW_DROP_MAC_ERR: case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
tx_stats(txo)->tx_spoof_check_err++; break; case LANCER_TX_COMP_QINQ_ERR:
tx_stats(txo)->tx_qinq_err++; break; case LANCER_TX_COMP_PARITY_ERR:
tx_stats(txo)->tx_internal_parity_err++; break; case LANCER_TX_COMP_DMA_ERR:
tx_stats(txo)->tx_dma_err++; break; case LANCER_TX_COMP_SGE_ERR:
tx_stats(txo)->tx_sge_err++; break;
}
}
/* Consume pending rx completions. * Wait for the flush completion (identified by zero num_rcvd) * to arrive. Notify CQ even when there are no more CQ entries * for HW to flush partially coalesced CQ entries. * In Lancer, there is no need to wait for flush compl.
*/ for (;;) {
rxcp = be_rx_compl_get(rxo); if (!rxcp) { if (lancer_chip(adapter)) break;
/* Free enqueued TX that was never notified to HW */
for_all_tx_queues(adapter, txo, i) {
txq = &txo->q;
if (atomic_read(&txq->used)) {
dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
i, atomic_read(&txq->used));
notified_idx = txq->tail;
end_idx = txq->tail;
index_adv(&end_idx, atomic_read(&txq->used) - 1,
txq->len); /* Use the tx-compl process logic to handle requests * that were not sent to the HW.
*/
num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
atomic_sub(num_wrbs, &txq->used);
BUG_ON(atomic_read(&txq->used));
txo->pend_wrb_cnt = 0; /* Since hw was never notified of these requests, * reset TXQ indices
*/
txq->head = notified_idx;
txq->tail = notified_idx;
}
}
}
staticvoid be_evt_queues_destroy(struct be_adapter *adapter)
{ struct be_eq_obj *eqo; int i;
staticint be_evt_queues_create(struct be_adapter *adapter)
{ struct be_queue_info *eq; struct be_eq_obj *eqo; struct be_aic_obj *aic; int i, rc;
/* need enough EQs to service both RX and TX queues */
adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
max(adapter->cfg_num_rx_irqs,
adapter->cfg_num_tx_irqs));
adapter->aic_enabled = true;
for_all_evt_queues(adapter, eqo, i) { int numa_node = dev_to_node(&adapter->pdev->dev);
q = &adapter->mcc_obj.q; if (q->created)
be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
be_queue_free(adapter, q);
q = &adapter->mcc_obj.cq; if (q->created)
be_cmd_q_destroy(adapter, q, QTYPE_CQ);
be_queue_free(adapter, q);
}
/* Must be called only after TX qs are created as MCC shares TX EQ */ staticint be_mcc_queues_create(struct be_adapter *adapter)
{ struct be_queue_info *q, *cq;
/* If num_evt_qs is less than num_tx_qs, then more than * one txq share an eq
*/
eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3); if (status) return status;
status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN, sizeof(struct be_eth_wrb)); if (status) return status;
status = be_cmd_txq_create(adapter, txo); if (status) return status;
/* When the interface is not capable of RSS rings (and there is no * need to create a default RXQ) we'll still need one RXQ
*/ if (adapter->num_rx_qs == 0)
adapter->num_rx_qs = 1;
/* IRQ is not expected when NAPI is scheduled as the EQ * will not be armed. * But, this can happen on Lancer INTx where it takes * a while to de-assert INTx or in BE2 where occasionaly * an interrupt may be raised even when EQ is unarmed. * If NAPI is already scheduled, then counting & notifying * events will orphan them.
*/ if (napi_schedule_prep(&eqo->napi)) {
num_evts = events_get(eqo);
__napi_schedule(&eqo->napi); if (num_evts)
eqo->spurious_intr = 0;
}
be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
/* Return IRQ_HANDLED only for the first spurious intr * after a valid intr to stop the kernel from branding * this irq as a bad one!
*/ if (num_evts || eqo->spurious_intr++ == 0) return IRQ_HANDLED; else return IRQ_NONE;
}
for (work_done = 0; work_done < budget; work_done++) {
rxcp = be_rx_compl_get(rxo); if (!rxcp) break;
/* Is it a flush compl that has no data */ if (unlikely(rxcp->num_rcvd == 0)) goto loop_continue;
/* Discard compl with partial DMA Lancer B0 */ if (unlikely(!rxcp->pkt_size)) {
be_rx_compl_discard(rxo, rxcp); goto loop_continue;
}
/* On BE drop pkts that arrive due to imperfect filtering in * promiscuous mode on some skews
*/ if (unlikely(rxcp->port != adapter->port_num &&
!lancer_chip(adapter))) {
be_rx_compl_discard(rxo, rxcp); goto loop_continue;
}
if (do_gro(rxcp))
be_rx_compl_process_gro(rxo, napi, rxcp); else
be_rx_compl_process(rxo, napi, rxcp);
if (work_done) {
be_cq_notify(adapter, rx_cq->id, true, work_done);
/* When an rx-obj gets into post_starved state, just * let be_worker do the posting.
*/ if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
!rxo->rx_post_starved)
be_post_rx_frags(rxo, GFP_ATOMIC,
max_t(u32, MAX_RX_POST,
frags_consumed));
}
return work_done;
}
staticvoid be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo, int idx)
{ int num_wrbs = 0, work_done = 0; struct be_tx_compl_info *txcp;
if (work_done) {
be_cq_notify(adapter, txo->cq.id, true, work_done);
atomic_sub(num_wrbs, &txo->q.used);
/* As Tx wrbs have been freed up, wake up netdev queue
* if it was stopped due to lack of tx wrbs. */ if (__netif_subqueue_stopped(adapter->netdev, idx) &&
be_can_txq_wake(txo)) {
netif_wake_subqueue(adapter->netdev, idx);
}
/* This loop will iterate twice for EQ0 in which * completions of the last RXQ (default one) are also processed * For other EQs the loop iterates only once
*/
for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
work = be_process_rx(rxo, napi, budget);
max_work = max(work, max_work);
}
if (is_mcc_eqo(eqo))
be_process_mcc(adapter);
if (max_work < budget) {
napi_complete_done(napi, max_work);
/* Skyhawk EQ_DB has a provision to set the rearm to interrupt * delay via a delay multiplier encoding value
*/ if (skyhawk_chip(adapter))
mult_enc = be_get_eq_delay_mult_enc(eqo);
if (ue_lo || ue_hi) { /* On certain platforms BE3 hardware can indicate * spurious UEs. In case of a UE in the chip, * the POST register correctly reports either a * FAT_LOG_START state (FW is currently dumping * FAT log data) or a ARMFW_UE state. Check for the * above states to ascertain if the UE is valid or not.
*/ if (BE3_chip(adapter)) {
val = be_POST_stage_get(adapter); if ((val & POST_STAGE_FAT_LOG_START)
!= POST_STAGE_FAT_LOG_START &&
(val & POST_STAGE_ARMFW_UE)
!= POST_STAGE_ARMFW_UE &&
(val & POST_STAGE_RECOVERABLE_ERR)
!= POST_STAGE_RECOVERABLE_ERR) return;
}
dev_err(dev, "Error detected in the adapter");
be_set_error(adapter, BE_ERROR_UE);
for (i = 0; ue_lo; ue_lo >>= 1, i++) { if (ue_lo & 1)
dev_err(dev, "UE: %s bit set\n",
ue_status_low_desc[i]);
} for (i = 0; ue_hi; ue_hi >>= 1, i++) { if (ue_hi & 1)
dev_err(dev, "UE: %s bit set\n",
ue_status_hi_desc[i]);
}
}
}
}
staticint be_msix_enable(struct be_adapter *adapter)
{ unsignedint i, max_roce_eqs; struct device *dev = &adapter->pdev->dev; int num_vec;
/* If RoCE is supported, program the max number of vectors that * could be used for NIC and RoCE, else, just program the number * we'll use initially.
*/ if (be_roce_supported(adapter)) {
max_roce_eqs =
be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
max_roce_eqs = min(max_roce_eqs, num_online_cpus());
num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
} else {
num_vec = max(adapter->cfg_num_rx_irqs,
adapter->cfg_num_tx_irqs);
}
for (i = 0; i < num_vec; i++)
adapter->msix_entries[i].entry = i;
if (msix_enabled(adapter)) {
status = be_msix_register(adapter); if (status == 0) goto done; /* INTx is not supported for VF */ if (be_virtfn(adapter)) return status;
}
/* INTx: only the first EQ is used */
netdev->irq = adapter->pdev->irq;
status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
&adapter->eq_obj[0]); if (status) {
dev_err(&adapter->pdev->dev, "INTx request IRQ failed - err %d\n", status); return status;
}
done:
adapter->isr_registered = true; return 0;
}
staticvoid be_irq_unregister(struct be_adapter *adapter)
{ struct net_device *netdev = adapter->netdev; struct be_eq_obj *eqo; int i, vec;
for_all_rx_queues(adapter, rxo, i) {
q = &rxo->q; if (q->created) { /* If RXQs are destroyed while in an "out of buffer" * state, there is a possibility of an HW stall on * Lancer. So, post 64 buffers to each queue to relieve * the "out of buffer" condition. * Make sure there's space in the RXQ before posting.
*/ if (lancer_chip(adapter)) {
be_rx_cq_clean(rxo); if (atomic_read(&q->used) == 0)
be_post_rx_frags(rxo, GFP_KERNEL,
MAX_RX_POST);
}
/* The IFACE flags are enabled in the open path and cleared * in the close path. When a VF gets detached from the host and * assigned to a VM the following happens: * - VF's IFACE flags get cleared in the detach path * - IFACE create is issued by the VF in the attach path * Due to a bug in the BE3/Skyhawk-R FW * (Lancer FW doesn't have the bug), the IFACE capability flags * specified along with the IFACE create cmd issued by a VF are not * honoured by FW. As a consequence, if a *new* driver * (that enables/disables IFACE flags in open/close) * is loaded in the host and an *old* driver is * used by a VM/VF, * the IFACE gets created *without* the needed flags. * To avoid this, disable RX-filter flags only for Lancer.
*/ if (lancer_chip(adapter)) {
be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
}
}
/* This protection is needed as be_close() may be called even when the * adapter is in cleared state (after eeh perm failure)
*/ if (!(adapter->flags & BE_FLAGS_SETUP_DONE)) return 0;
/* Before attempting cleanup ensure all the pending cmds in the * config_wq have finished execution
*/
flush_workqueue(be_wq);
memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
} else { /* Disable RSS, if only default RX Q is created */
rss->rss_flags = RSS_ENABLE_NONE;
}
/* Post 1 less than RXQ-len to avoid head being equal to tail, * which is a queue empty condition
*/
for_all_rx_queues(adapter, rxo, i)
be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
return 0;
}
staticint be_enable_if_filters(struct be_adapter *adapter)
{ int status;
status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON); if (status) return status;
/* Normally this condition usually true as the ->dev_mac is zeroed. * But on BE3 VFs the initial MAC is pre-programmed by PF and * subsequent be_dev_mac_add() can fail (after fresh boot)
*/ if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) { int old_pmac_id = -1;
/* Remember old programmed MAC if any - can happen on BE3 VF */ if (!is_zero_ether_addr(adapter->dev_mac))
old_pmac_id = adapter->pmac_id[0];
status = be_dev_mac_add(adapter, adapter->netdev->dev_addr); if (status) return status;
/* Delete the old programmed MAC as we successfully programmed * a new MAC
*/ if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
be_dev_mac_del(adapter, old_pmac_id);
mac[5] = (u8)(addr & 0xFF);
mac[4] = (u8)((addr >> 8) & 0xFF);
mac[3] = (u8)((addr >> 16) & 0xFF); /* Use the OUI from the current MAC address */
memcpy(mac, adapter->netdev->dev_addr, 3);
}
/* * Generate a seed MAC address from the PF MAC Address using jhash. * MAC Address for VFs are assigned incrementally starting from the seed. * These addresses are programmed in the ASIC by the PF and the VF driver * queries for the MAC address during its probe.
*/ staticint be_vf_eth_addr_config(struct be_adapter *adapter)
{
u32 vf; int status = 0;
u8 mac[ETH_ALEN]; struct be_vf_cfg *vf_cfg;
be_vf_eth_addr_generate(adapter, mac);
for_all_vfs(adapter, vf_cfg, vf) { if (BEx_chip(adapter))
status = be_cmd_pmac_add(adapter, mac,
vf_cfg->if_handle,
&vf_cfg->pmac_id, vf + 1); else
status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
vf + 1);
if (status)
dev_err(&adapter->pdev->dev, "Mac address assignment failed for VF %d\n",
vf); else
memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
/* VxLAN offload Notes: * * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload * is expected to work across all types of IP tunnels once exported. Skyhawk * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN * offloads in hw_enc_features only when a VxLAN port is added. If other (non * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for * those other tunnels are unexported on the fly through ndo_features_check().
*/ staticint be_vxlan_set_port(struct net_device *netdev, unsignedint table, unsignedint entry, struct udp_tunnel_info *ti)
{ struct be_adapter *adapter = netdev_priv(netdev); struct device *dev = &adapter->pdev->dev; int status;
status = be_cmd_manage_iface(adapter, adapter->if_handle,
OP_CONVERT_NORMAL_TO_TUNNEL); if (status) {
dev_warn(dev, "Failed to convert normal interface to tunnel\n"); return status;
}
adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
status = be_cmd_set_vxlan_port(adapter, ti->port); if (status) {
dev_warn(dev, "Failed to add VxLAN port\n"); return status;
}
adapter->vxlan_port = ti->port;
/* Distribute the queue resources among the PF and it's VFs */ if (num_vfs) { /* Divide the rx queues evenly among the VFs and the PF, capped * at VF-EQ-count. Any remainder queues belong to the PF.
*/
num_vf_qs = min(SH_VF_MAX_NIC_EQS,
res.max_rss_qs / (num_vfs + 1));
/* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES * RSS Tables per port. Provide RSS on VFs, only if number of * VFs requested is less than it's PF Pool's RSS Tables limit.
*/ if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
num_vf_qs = 1;
}
/* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd, * which are modifiable using SET_PROFILE_CONFIG cmd.
*/
be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
RESOURCE_MODIFIABLE, 0);
/* If RSS IFACE capability flags are modifiable for a VF, set the * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if * more than 1 RSSQ is available for a VF. * Otherwise, provision only 1 queue pair for VF.
*/ if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT); if (num_vf_qs > 1) {
vf_if_cap_flags |= BE_IF_FLAGS_RSS; if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
} else {
vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
BE_IF_FLAGS_DEFQ_RSS);
}
} else {
num_vf_qs = 1;
}
/* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally * among the PF and it's VFs, if the fields are changeable
*/ if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
/* Re-configure FW to distribute resources evenly across max-supported * number of VFs, only when VFs are not already enabled.
*/ if (skyhawk_chip(adapter) && be_physfn(adapter) &&
!pci_vfs_assigned(pdev)) {
be_calculate_vf_res(adapter,
pci_sriov_get_totalvfs(pdev),
&vft_res);
be_cmd_set_sriov_config(adapter, adapter->pool_res,
pci_sriov_get_totalvfs(pdev),
&vft_res);
}
if (!old_vfs) {
status = pci_enable_sriov(adapter->pdev, adapter->num_vfs); if (status) {
dev_err(dev, "SRIOV enable failed\n");
adapter->num_vfs = 0; goto err;
}
}
if (BE3_chip(adapter)) { /* On BE3, enable VEB only when SRIOV is enabled */
status = be_cmd_set_hsw_config(adapter, 0, 0,
adapter->if_handle,
PORT_FWD_TYPE_VEB, 0); if (status) goto err;
}
if (be_is_mc(adapter)) { /* Assuming that there are 4 channels per port, * when multi-channel is enabled
*/ if (be_is_qnq_mode(adapter))
res->max_vlans = BE_NUM_VLANS_SUPPORTED/8; else /* In a non-qnq multichannel mode, the pvid * takes up one vlan entry
*/
res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
} else {
res->max_vlans = BE_NUM_VLANS_SUPPORTED;
}
res->max_mcast_mac = BE_MAX_MC;
/* 1) For BE3 1Gb ports, FW does not support multiple TXQs * 2) Create multiple TX rings on a BE3-R multi-channel interface * *only* if it is RSS-capable.
*/ if (BE2_chip(adapter) || use_sriov || (adapter->port_num > 1) ||
be_virtfn(adapter) ||
(be_is_mc(adapter) &&
!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
res->max_tx_qs = 1;
} elseif (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) { struct be_resources super_nic_res = {0};
/* On a SuperNIC profile, the driver needs to use the * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
*/
be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
0); /* Some old versions of BE3 FW don't report max_tx_qs value */
res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
} else {
res->max_tx_qs = BE3_MAX_TX_QS;
}
/* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port. * However, this HW limitation is not exposed to the host via any SLI cmd. * As a result, in the case of SRIOV and in particular multi-partition configs * the driver needs to calcuate a proportional share of RSS Tables per PF-pool * for distribution between the VFs. This self-imposed limit will determine the * no: of VFs for which RSS can be enabled.
*/ staticvoid be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
{ struct be_port_resources port_res = {0};
u8 rss_tables_on_port;
u16 max_vfs = be_max_vfs(adapter);
/* Each PF Pool's RSS Tables limit = * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
*/
adapter->pool_res.max_rss_tables =
max_vfs * rss_tables_on_port / port_res.max_vfs;
}
staticint be_get_sriov_config(struct be_adapter *adapter)
{ struct be_resources res = {0}; int max_vfs, old_vfs;
/* Some old versions of BE3 FW don't report max_vfs value */ if (BE3_chip(adapter) && !res.max_vfs) {
max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
}
adapter->pool_res = res;
/* If during previous unload of the driver, the VFs were not disabled, * then we cannot rely on the PF POOL limits for the TotalVFs value. * Instead use the TotalVFs value stored in the pci-dev struct.
*/
old_vfs = pci_num_vf(adapter->pdev); if (old_vfs) {
dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
old_vfs);
if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
be_calculate_pf_pool_rss_tables(adapter);
dev_info(&adapter->pdev->dev, "RSS can be enabled for all VFs if num_vfs <= %d\n",
be_max_pf_pool_rss_tables(adapter));
} return 0;
}
staticvoid be_alloc_sriov_res(struct be_adapter *adapter)
{ int old_vfs = pci_num_vf(adapter->pdev); struct be_resources vft_res = {0}; int status;
be_get_sriov_config(adapter);
if (!old_vfs)
pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
/* When the HW is in SRIOV capable configuration, the PF-pool * resources are given to PF during driver load, if there are no * old VFs. This facility is not available in BE3 FW. * Also, this is done by FW in Lancer chip.
*/ if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
be_calculate_vf_res(adapter, 0, &vft_res);
status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
&vft_res); if (status)
dev_err(&adapter->pdev->dev, "Failed to optimize SRIOV resources\n");
}
}
staticint be_get_resources(struct be_adapter *adapter)
{ struct device *dev = &adapter->pdev->dev; struct be_resources res = {0}; int status;
/* For Lancer, SH etc read per-function resource limits from FW. * GET_FUNC_CONFIG returns per function guaranteed limits. * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
*/ if (BEx_chip(adapter)) {
BEx_get_resources(adapter, &res);
} else {
status = be_cmd_get_func_config(adapter, &res); if (status) return status;
/* If a deafault RXQ must be created, we'll use up one RSSQ*/ if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
!(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
res.max_rss_qs -= 1;
}
/* If RoCE is supported stash away half the EQs for RoCE */
res.max_nic_evt_qs = be_roce_supported(adapter) ?
res.max_evt_qs / 2 : res.max_evt_qs;
adapter->res = res;
/* If FW supports RSS default queue, then skip creating non-RSS * queue for non-IP traffic.
*/
adapter->need_def_rxq = (be_if_cap_flags(adapter) &
BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
/* Ensure RX and TX queues are created in pairs at init time */
adapter->cfg_num_rx_irqs =
min_t(u16, netif_get_num_default_rss_queues(),
be_max_qp_irqs(adapter));
adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs; return 0;
}
staticint be_get_config(struct be_adapter *adapter)
{ int status, level;
u16 profile_id;
status = be_cmd_get_cntl_attributes(adapter); if (status) return status;
status = be_cmd_query_fw_cfg(adapter); if (status) return status;
if (!lancer_chip(adapter) && be_physfn(adapter))
be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
if (be_physfn(adapter)) {
status = be_cmd_get_active_profile(adapter, &profile_id); if (!status)
dev_info(&adapter->pdev->dev, "Using profile 0x%x\n", profile_id);
}
return 0;
}
staticint be_mac_setup(struct be_adapter *adapter)
{
u8 mac[ETH_ALEN]; int status;
if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
status = be_cmd_get_perm_mac(adapter, mac); if (status) return status;
/* alloc required memory for other filtering fields */
adapter->pmac_id = kcalloc(be_max_uc(adapter), sizeof(*adapter->pmac_id), GFP_KERNEL); if (!adapter->pmac_id) return -ENOMEM;
adapter->mc_list = kcalloc(be_max_mc(adapter), sizeof(*adapter->mc_list), GFP_KERNEL); if (!adapter->mc_list) return -ENOMEM;
adapter->uc_list = kcalloc(be_max_uc(adapter), sizeof(*adapter->uc_list), GFP_KERNEL); if (!adapter->uc_list) return -ENOMEM;
if (adapter->cfg_num_rx_irqs == 1)
cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
en_flags &= cap_flags; /* will enable all the needed filter flags in be_open() */ return be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
&adapter->if_handle, 0);
}
int be_update_queues(struct be_adapter *adapter)
{ struct net_device *netdev = adapter->netdev; int status;
if (netif_running(netdev)) { /* be_tx_timeout() must not run concurrently with this * function, synchronize with an already-running dev_watchdog
*/
netif_tx_lock_bh(netdev); /* device cannot transmit now, avoid dev_watchdog timeouts */
netif_carrier_off(netdev);
netif_tx_unlock_bh(netdev);
be_close(netdev);
}
be_cancel_worker(adapter);
/* If any vectors have been shared with RoCE we cannot re-program * the MSIx table.
*/ if (!adapter->num_msix_roce_vec)
be_msix_disable(adapter);
be_clear_queues(adapter);
status = be_cmd_if_destroy(adapter, adapter->if_handle, 0); if (status) return status;
if (!msix_enabled(adapter)) {
status = be_msix_enable(adapter); if (status) return status;
}
status = be_if_create(adapter); if (status) return status;
status = be_setup_queues(adapter); if (status) return status;
be_schedule_worker(adapter);
/* The IF was destroyed and re-created. We need to clear * all promiscuous flags valid for the destroyed IF. * Without this promisc mode is not restored during * be_open() because the driver thinks that it is * already enabled in HW.
*/
adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
if (netif_running(netdev))
status = be_open(netdev);
return status;
}
staticinlineint fw_major_num(constchar *fw_ver)
{ int fw_major = 0, i;
i = sscanf(fw_ver, "%d.", &fw_major); if (i != 1) return 0;
return fw_major;
}
/* If it is error recovery, FLR the PF * Else if any VFs are already enabled don't FLR the PF
*/ staticbool be_reset_required(struct be_adapter *adapter)
{ if (be_error_recovering(adapter)) returntrue; else return pci_num_vf(adapter->pdev) == 0;
}
/* Wait for the FW to be ready and perform the required initialization */ staticint be_func_init(struct be_adapter *adapter)
{ int status;
status = be_fw_wait_ready(adapter); if (status) return status;
/* FW is now ready; clear errors to allow cmds/doorbell */
be_clear_error(adapter, BE_CLEAR_ALL);
if (be_reset_required(adapter)) {
status = be_cmd_reset_function(adapter); if (status) return status;
/* Wait for interrupts to quiesce after an FLR */
msleep(100);
}
/* Tell FW we're ready to fire cmds */
status = be_cmd_fw_init(adapter); if (status) return status;
/* Allow interrupts for other ULPs running on NIC function */
be_intr_set(adapter, true);
status = be_func_init(adapter); if (status) return status;
be_setup_init(adapter);
if (!lancer_chip(adapter))
be_cmd_req_native_mode(adapter);
/* invoke this cmd first to get pf_num and vf_num which are needed * for issuing profile related cmds
*/ if (!BEx_chip(adapter)) {
status = be_cmd_get_func_config(adapter, NULL); if (status) return status;
}
status = be_get_config(adapter); if (status) goto err;
if (!BE2_chip(adapter) && be_physfn(adapter))
be_alloc_sriov_res(adapter);
status = be_get_resources(adapter); if (status) goto err;
status = be_msix_enable(adapter); if (status) goto err;
/* will enable all the needed filter flags in be_open() */
status = be_if_create(adapter); if (status) goto err;
/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
rtnl_lock();
status = be_setup_queues(adapter);
rtnl_unlock(); if (status) goto err;
status = be_mac_setup(adapter); if (status) goto err;
be_cmd_get_fw_ver(adapter);
dev_info(dev, "FW version is %s\n", adapter->fw_ver);
if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
adapter->fw_ver);
dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
}
status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
adapter->rx_fc); if (status)
be_cmd_get_flow_control(adapter, &adapter->tx_fc,
&adapter->rx_fc);
dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
adapter->tx_fc, adapter->rx_fc);
if (be_physfn(adapter))
be_cmd_set_logical_link_config(adapter,
IFLA_VF_LINK_STATE_AUTO, 0);
/* BE3 EVB echoes broadcast/multicast packets back to PF's vport * confusing a linux bridge or OVS that it might be connected to. * Set the EVB to PASSTHRU mode which effectively disables the EVB * when SRIOV is not enabled.
*/ if (BE3_chip(adapter))
be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
PORT_FWD_TYPE_PASSTHRU, 0);
if (adapter->num_vfs)
be_vf_setup(adapter);
status = be_cmd_get_phy_info(adapter); if (!status && be_pause_supported(adapter))
adapter->phy.fc_autoneg = 1;
if (be_physfn(adapter) && !lancer_chip(adapter))
be_cmd_set_features(adapter);
return status;
}
err:
dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
return status;
}
staticint be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask, int nlflags)
{ struct be_adapter *adapter = netdev_priv(dev); int status = 0;
u8 hsw_mode;
/* BE and Lancer chips support VEB mode only */ if (BEx_chip(adapter) || lancer_chip(adapter)) { /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */ if (!pci_sriov_get_totalvfs(adapter->pdev)) return 0;
hsw_mode = PORT_FWD_TYPE_VEB;
} else {
status = be_cmd_get_hsw_config(adapter, NULL, 0,
adapter->if_handle, &hsw_mode,
NULL); if (status) return 0;
if (hsw_mode == PORT_FWD_TYPE_PASSTHRU) return 0;
}
if (skb_is_gso(skb)) { /* IPv6 TSO requests with extension hdrs are a problem * to Lancer and BE3 HW. Disable TSO6 feature.
*/ if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
features &= ~NETIF_F_TSO6;
/* Lancer cannot handle the packet with MSS less than 256. * Also it can't handle a TSO packet with a single segment * Disable the GSO support in such cases
*/ if (lancer_chip(adapter) &&
(skb_shinfo(skb)->gso_size < 256 ||
skb_shinfo(skb)->gso_segs == 1))
features &= ~NETIF_F_GSO_MASK;
}
/* The code below restricts offload features for some tunneled and * Q-in-Q packets. * Offload features for normal (non tunnel) packets are unchanged.
*/
features = vlan_features_check(skb, features); if (!skb->encapsulation ||
!(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)) return features;
/* It's an encapsulated packet and VxLAN offloads are enabled. We * should disable tunnel offload features if it's not a VxLAN packet, * as tunnel offloads have been enabled only for VxLAN. This is done to * allow other tunneled traffic like GRE work fine while VxLAN * offloads are configured in Skyhawk-R.
*/ switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP):
l4_hdr = ip_hdr(skb)->protocol; break; case htons(ETH_P_IPV6):
l4_hdr = ipv6_hdr(skb)->nexthdr; break; default: return features;
}
if (MAX_PHYS_ITEM_ID_LEN < id_len) return -ENOSPC;
ppid->id[0] = adapter->hba_port_num + 1;
id = &ppid->id[1]; for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
i--, id += CNTL_SERIAL_NUM_WORD_SZ)
memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
val = be_POST_stage_get(adapter); if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR) returnfalse;
ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK; if (ue_err_code == 0) returnfalse;
/* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR * milliseconds before it checks for final error status in * SLIPORT_SEMAPHORE to determine if recovery criteria is met. * If it does, then PF0 initiates a Soft Reset.
*/ if (adapter->pf_num == 0) {
err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
err_rec->resched_delay = err_rec->ue_to_reset_time -
ERR_RECOVERY_UE_DETECT_DURATION; break;
}
case ERR_RECOVERY_ST_RESET: if (!be_err_is_recoverable(adapter)) {
dev_err(&adapter->pdev->dev, "Failed to meet recovery criteria\n");
status = -EIO;
err_rec->resched_delay = 0; break;
}
be_soft_reset(adapter);
err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
err_rec->resched_delay = err_rec->ue_to_poll_time -
err_rec->ue_to_reset_time; break;
case ERR_RECOVERY_ST_PRE_POLL:
err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
err_rec->resched_delay = 0;
status = 0; /* done */ break;
default:
status = -EINVAL;
err_rec->resched_delay = 0; break;
}
return status;
}
staticint be_err_recover(struct be_adapter *adapter)
{ int status;
if (!lancer_chip(adapter)) { if (!adapter->error_recovery.recovery_supported ||
adapter->priv_flags & BE_DISABLE_TPE_RECOVERY) return -EIO;
status = be_tpe_recover(adapter); if (status) goto err;
}
/* Wait for adapter to reach quiescent state before * destroying queues
*/
status = be_fw_wait_ready(adapter); if (status) goto err;
adapter->flags |= BE_FLAGS_TRY_RECOVERY;
be_cleanup(adapter);
status = be_resume(adapter); if (status) goto err;
if (be_physfn(adapter) &&
MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
be_cmd_get_die_temperature(adapter);
/* when interrupts are not yet enabled, just reap any pending * mcc completions
*/ if (!netif_running(adapter->netdev)) {
local_bh_disable();
be_process_mcc(adapter);
local_bh_enable(); goto reschedule;
}
if (!adapter->stats_cmd_sent) { if (lancer_chip(adapter))
lancer_cmd_get_pport_stats(adapter,
&adapter->stats_cmd); else
be_cmd_get_stats(adapter, &adapter->stats_cmd);
}
for_all_rx_queues(adapter, rxo, i) { /* Replenish RX-queues starved due to memory * allocation failures.
*/ if (rxo->rx_post_starved)
be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
}
/* EQ-delay update for Skyhawk is done while notifying EQ */ if (!skyhawk_chip(adapter))
be_eqd_update(adapter, false);
if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
be_log_sfp_info(adapter);
status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (status) {
dev_err(&pdev->dev, "Could not set PCI DMA Mask\n"); goto free_netdev;
}
status = be_map_pci_bars(adapter); if (status) goto free_netdev;
status = be_drv_init(adapter); if (status) goto unmap_bars;
status = be_setup(adapter); if (status) goto drv_cleanup;
be_netdev_init(netdev);
status = register_netdev(netdev); if (status != 0) goto unsetup;
/* On Die temperature not supported for VF. */ if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
adapter->hwmon_info.hwmon_dev =
devm_hwmon_device_register_with_groups(&pdev->dev,
DRV_NAME,
adapter,
be_hwmon_groups);
adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
}
dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
func_name(adapter), mc_name(adapter), adapter->port_name);
if (!be_check_error(adapter, BE_ERROR_EEH)) {
be_set_error(adapter, BE_ERROR_EEH);
be_cancel_err_detection(adapter);
be_cleanup(adapter);
}
if (state == pci_channel_io_perm_failure) return PCI_ERS_RESULT_DISCONNECT;
pci_disable_device(pdev);
/* The error could cause the FW to trigger a flash debug dump. * Resetting the card while flash dump is in progress * can cause it not to recover; wait for it to finish. * Wait only for first function as it is needed only once per * adapter.
*/ if (pdev->devfn == 0)
ssleep(30);
status = pci_enable_device(pdev); if (status) return PCI_ERS_RESULT_DISCONNECT;
pci_set_master(pdev);
pci_restore_state(pdev);
/* Check if card is ok and fw is ready */
dev_info(&adapter->pdev->dev, "Waiting for FW to be ready after EEH reset\n");
status = be_fw_wait_ready(adapter); if (status) return PCI_ERS_RESULT_DISCONNECT;
staticint be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
{ struct be_adapter *adapter = pci_get_drvdata(pdev); struct be_resources vft_res = {0}; int status;
if (!num_vfs)
be_vf_clear(adapter);
adapter->num_vfs = num_vfs;
if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
dev_warn(&pdev->dev, "Cannot disable VFs while they are assigned\n"); return -EBUSY;
}
/* When the HW is in SRIOV capable configuration, the PF-pool resources * are equally distributed across the max-number of VFs. The user may * request only a subset of the max-vfs to be enabled. * Based on num_vfs, redistribute the resources across num_vfs so that * each VF will have access to more number of resources. * This facility is not available in BE3 FW. * Also, this is done by FW in Lancer chip.
*/ if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
be_calculate_vf_res(adapter, adapter->num_vfs,
&vft_res);
status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
adapter->num_vfs, &vft_res); if (status)
dev_err(&pdev->dev, "Failed to optimize SR-IOV resources\n");
}
status = be_get_resources(adapter); if (status) return be_cmd_status(status);
/* Updating real_num_tx/rx_queues() requires rtnl_lock() */
rtnl_lock();
status = be_update_queues(adapter);
rtnl_unlock(); if (status) return be_cmd_status(status);
if (adapter->num_vfs)
status = be_vf_setup(adapter);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.