/* bnx2x_cmn.c: QLogic Everest network driver. * * Copyright (c) 2007-2013 Broadcom Corporation * Copyright (c) 2014 QLogic Corporation * All rights reserved * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation. * * Maintained by: Ariel Elior <ariel.elior@qlogic.com> * Written by: Eliezer Tamir * Based on code from Michael Chan's bnx2 driver * UDP CSUM errata workaround by Arik Gendelman * Slowpath and fastpath rework by Vladislav Zolotarov * Statistics and Link management by Yitchak Gertner *
*/
/** * bnx2x_move_fp - move content of the fastpath structure. * * @bp: driver handle * @from: source FP index * @to: destination FP index * * Makes sure the contents of the bp->fp[to].napi is kept * intact. This is done by first copying the napi struct from * the target to the source, and then mem copying the entire * source onto the target. Update txdata pointers and related * content.
*/ staticinlinevoid bnx2x_move_fp(struct bnx2x *bp, int from, int to)
{ struct bnx2x_fastpath *from_fp = &bp->fp[from]; struct bnx2x_fastpath *to_fp = &bp->fp[to]; struct bnx2x_sp_objs *from_sp_objs = &bp->sp_objs[from]; struct bnx2x_sp_objs *to_sp_objs = &bp->sp_objs[to]; struct bnx2x_fp_stats *from_fp_stats = &bp->fp_stats[from]; struct bnx2x_fp_stats *to_fp_stats = &bp->fp_stats[to]; int old_max_eth_txqs, new_max_eth_txqs; int old_txdata_index = 0, new_txdata_index = 0; struct bnx2x_agg_info *old_tpa_info = to_fp->tpa_info;
/* Copy the NAPI object as it has been already initialized */
from_fp->napi = to_fp->napi;
/* Retain the tpa_info of the original `to' version as we don't want * 2 FPs to contain the same tpa_info pointer.
*/
to_fp->tpa_info = old_tpa_info;
/* move sp_objs contents as well, as their indices match fp ones */
memcpy(to_sp_objs, from_sp_objs, sizeof(*to_sp_objs));
/* move fp_stats contents as well, as their indices match fp ones */
memcpy(to_fp_stats, from_fp_stats, sizeof(*to_fp_stats));
/* Update txdata pointers in fp and move txdata content accordingly: * Each fp consumes 'max_cos' txdata structures, so the index should be * decremented by max_cos x delta.
*/
/** * bnx2x_fill_fw_str - Fill buffer with FW version string. * * @bp: driver handle * @buf: character buffer to fill with the fw name * @buf_len: length of the above buffer *
*/ void bnx2x_fill_fw_str(struct bnx2x *bp, char *buf, size_t buf_len)
{ if (IS_PF(bp)) {
u8 phy_fw_ver[PHY_FW_VER_LEN];
/** * bnx2x_shrink_eth_fp - guarantees fastpath structures stay intact * * @bp: driver handle * @delta: number of eth queues which were not allocated
*/ staticvoid bnx2x_shrink_eth_fp(struct bnx2x *bp, int delta)
{ int i, cos, old_eth_num = BNX2X_NUM_ETH_QUEUES(bp);
/* Queue pointer cannot be re-set on an fp-basis, as moving pointer * backward along the array could cause memory to be overridden
*/ for (cos = 1; cos < bp->max_cos; cos++) { for (i = 0; i < old_eth_num - delta; i++) { struct bnx2x_fastpath *fp = &bp->fp[i]; int new_idx = cos * (old_eth_num - delta) + i;
/* Need to make the tx_bd_cons update visible to start_xmit() * before checking for netif_tx_queue_stopped(). Without the * memory barrier, there is a small possibility that * start_xmit() will miss it and cause the queue to be stopped * forever. * On the other hand we need an rmb() here to ensure the proper * ordering of bit testing in the following * netif_tx_queue_stopped(txq) call.
*/
smp_mb();
if (unlikely(netif_tx_queue_stopped(txq))) { /* Taking tx_lock() is needed to prevent re-enabling the queue * while it's empty. This could have happen if rx_action() gets * suspended in bnx2x_tx_int() after the condition before * netif_tx_wake_queue(), while tx_action (bnx2x_start_xmit()): * * stops the queue->sees fresh tx_bd_cons->releases the queue-> * sends some packets consuming the whole queue again-> * stops the queue
*/
/* Here we assume that the last SGE index is the biggest */
prefetch((void *)(fp->sge_mask));
bnx2x_update_last_max_sge(fp,
le16_to_cpu(cqe->sgl_or_raw_data.sgl[sge_len - 1]));
/* Get Toeplitz hash value in the skb using the value from the * CQE (calculated by HW).
*/ static u32 bnx2x_get_rxhash(conststruct bnx2x *bp, conststruct eth_fast_path_rx_cqe *cqe, enum pkt_hash_types *rxhash_type)
{ /* Get Toeplitz hash from CQE */ if ((bp->dev->features & NETIF_F_RXHASH) &&
(cqe->status_flags & ETH_FAST_PATH_RX_CQE_RSS_HASH_FLG)) { enum eth_rss_hash_type htype;
/* print error if current state != stop */ if (tpa_info->tpa_state != BNX2X_TPA_STOP)
BNX2X_ERR("start of bin not in stop [%d]\n", queue);
/* Try to map an empty data buffer from the aggregation info */
mapping = dma_map_single(&bp->pdev->dev,
first_buf->data + NET_SKB_PAD,
fp->rx_buf_size, DMA_FROM_DEVICE); /* * ...if it fails - move the skb from the consumer to the producer * and set the current aggregation state as ERROR to drop it * when TPA_STOP arrives.
*/
if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) { /* Move the BD from the consumer to the producer */
bnx2x_reuse_rx_data(fp, cons, prod);
tpa_info->tpa_state = BNX2X_TPA_ERROR; return;
}
/* move empty data from pool to prod */
prod_rx_buf->data = first_buf->data;
dma_unmap_addr_set(prod_rx_buf, mapping, mapping); /* point prod_bd to new data */
prod_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
prod_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
/* move partial skb from cons to pool (don't unmap yet) */
*first_buf = *cons_rx_buf;
/* mark bin state as START */
tpa_info->parsing_flags =
le16_to_cpu(cqe->pars_flags.flags);
tpa_info->vlan_tag = le16_to_cpu(cqe->vlan_tag);
tpa_info->tpa_state = BNX2X_TPA_START;
tpa_info->len_on_bd = le16_to_cpu(cqe->len_on_bd);
tpa_info->placement_offset = cqe->placement_offset;
tpa_info->rxhash = bnx2x_get_rxhash(bp, cqe, &tpa_info->rxhash_type); if (fp->mode == TPA_MODE_GRO) {
u16 gro_size = le16_to_cpu(cqe->pkt_len_or_gro_seg_len);
tpa_info->full_page = SGE_PAGES / gro_size * gro_size;
tpa_info->gro_size = gro_size;
}
/* Timestamp option length allowed for TPA aggregation: * * nop nop kind length echo val
*/ #define TPA_TSTAMP_OPT_LEN 12 /** * bnx2x_set_gro_params - compute GRO values * * @skb: packet skb * @parsing_flags: parsing flags from the START CQE * @len_on_bd: total length of the first packet for the * aggregation. * @pkt_len: length of all segments * @num_of_coalesced_segs: count of segments * * Approximate value of the MSS for this aggregation calculated using * the first packet of it. * Compute number of aggregated segments, and gso_type.
*/ staticvoid bnx2x_set_gro_params(struct sk_buff *skb, u16 parsing_flags,
u16 len_on_bd, unsignedint pkt_len,
u16 num_of_coalesced_segs)
{ /* TPA aggregation won't have either IP options or TCP options * other than timestamp or IPv6 extension headers.
*/
u16 hdrs_len = ETH_HLEN + sizeof(struct tcphdr);
/* Check if there was a TCP timestamp, if there is it's will * always be 12 bytes length: nop nop kind length echo val. * * Otherwise FW would close the aggregation.
*/ if (parsing_flags & PARSING_FLAGS_TIME_STAMP_EXIST_FLAG)
hdrs_len += TPA_TSTAMP_OPT_LEN;
skb_shinfo(skb)->gso_size = len_on_bd - hdrs_len;
/* tcp_gro_complete() will copy NAPI_GRO_CB(skb)->count * to skb_shinfo(skb)->gso_segs
*/
NAPI_GRO_CB(skb)->count = num_of_coalesced_segs;
}
/* This is needed in order to enable forwarding support */ if (frag_size)
bnx2x_set_gro_params(skb, tpa_info->parsing_flags, len_on_bd,
le16_to_cpu(cqe->pkt_len),
le16_to_cpu(cqe->num_of_coalesced_segs));
#ifdef BNX2X_STOP_ON_ERROR if (pages > min_t(u32, 8, MAX_SKB_FRAGS) * SGE_PAGES) {
BNX2X_ERR("SGL length is too long: %d. CQE index is %d\n",
pages, cqe_idx);
BNX2X_ERR("cqe->pkt_len = %d\n", cqe->pkt_len);
bnx2x_panic(); return -EINVAL;
} #endif
/* Run through the SGL and compose the fragmented skb */ for (i = 0, j = 0; i < pages; i += PAGES_PER_SGE, j++) {
u16 sge_idx = RX_SGE(le16_to_cpu(cqe->sgl_or_raw_data.sgl[j]));
/* FW gives the indices of the SGE as if the ring is an array
(meaning that "next" element will consume 2 indices) */ if (fp->mode == TPA_MODE_GRO)
frag_len = min_t(u32, frag_size, (u32)full_page); else/* LRO */
frag_len = min_t(u32, frag_size, (u32)SGE_PAGES);
/* If we fail to allocate a substitute page, we simply stop
where we are and drop the whole packet */
err = bnx2x_alloc_rx_sge(bp, fp, sge_idx, GFP_ATOMIC); if (unlikely(err)) {
bnx2x_fp_qstats(bp, fp)->rx_skb_alloc_failed++; return err;
}
dma_unmap_page(&bp->pdev->dev,
dma_unmap_addr(&old_rx_pg, mapping),
SGE_PAGE_SIZE, DMA_FROM_DEVICE); /* Add one frag and update the appropriate fields in the skb */ if (fp->mode == TPA_MODE_LRO)
skb_fill_page_desc(skb, j, old_rx_pg.page,
old_rx_pg.offset, frag_len); else { /* GRO */ int rem; int offset = 0; for (rem = frag_len; rem > 0; rem -= gro_size) { int len = rem > gro_size ? gro_size : rem;
skb_fill_page_desc(skb, frag_id++,
old_rx_pg.page,
old_rx_pg.offset + offset,
len); if (offset)
get_page(old_rx_pg.page);
offset += len;
}
}
staticvoid *bnx2x_frag_alloc(conststruct bnx2x_fastpath *fp, gfp_t gfp_mask)
{ if (fp->rx_frag_size) { /* GFP_KERNEL allocations are used only during initialization */ if (unlikely(gfpflags_allow_blocking(gfp_mask))) return (void *)__get_free_page(gfp_mask);
/* If we there was an error during the handling of the TPA_START - * drop this aggregation.
*/ if (old_tpa_state == BNX2X_TPA_ERROR) goto drop;
/* Try to allocate the new data */
new_data = bnx2x_frag_alloc(fp, GFP_ATOMIC); /* Unmap skb in the pool anyway, as we are going to change pool entry status to BNX2X_TPA_STOP even if new skb allocation
fails. */
dma_unmap_single(&bp->pdev->dev, dma_unmap_addr(rx_buf, mapping),
fp->rx_buf_size, DMA_FROM_DEVICE); if (likely(new_data))
skb = bnx2x_build_skb(fp, data);
if (likely(skb)) { #ifdef BNX2X_STOP_ON_ERROR if (pad + len > fp->rx_buf_size) {
BNX2X_ERR("skb_put is about to fail... pad %d len %d rx_buf_size %d\n",
pad, len, fp->rx_buf_size);
bnx2x_panic();
bnx2x_frag_free(fp, new_data); return;
} #endif
skb_reserve(skb, pad + NET_SKB_PAD);
skb_put(skb, len);
skb_set_hash(skb, tpa_info->rxhash, tpa_info->rxhash_type);
if (!bnx2x_fill_frag_skb(bp, fp, tpa_info, pages,
skb, cqe, cqe_idx)) { if (tpa_info->parsing_flags & PARSING_FLAGS_VLAN)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tpa_info->vlan_tag);
bnx2x_gro_receive(bp, fp, skb);
} else {
DP(NETIF_MSG_RX_STATUS, "Failed to allocate new pages - dropping packet!\n");
dev_kfree_skb_any(skb);
}
/* put new data in bin */
rx_buf->data = new_data;
return;
} if (new_data)
bnx2x_frag_free(fp, new_data);
drop: /* drop the packet and keep the buffer in the bin */
DP(NETIF_MSG_RX_STATUS, "Failed to allocate or map a new skb - dropping packet!\n");
bnx2x_fp_stats(bp, fp)->eth_q_stats.rx_skb_alloc_failed++;
}
static void bnx2x_csum_validate(struct sk_buff *skb, union eth_rx_cqe *cqe, struct bnx2x_fastpath *fp, struct bnx2x_eth_q_stats *qstats)
{ /* Do nothing if no L4 csum validation was done. * We do not check whether IP csum was validated. For IPv4 we assume * that if the card got as far as validating the L4 csum, it also * validated the IP csum. IPv6 has no IP csum.
*/ if (cqe->fast_path_cqe.status_flags &
ETH_FAST_PATH_RX_CQE_L4_XSUM_NO_VALIDATION_FLG) return;
/* If L4 validation was done, check if an error was found. */
/* A rmb() is required to ensure that the CQE is not read * before it is written by the adapter DMA. PCI ordering * rules will make sure the other fields are written before * the marker at the end of struct eth_fast_path_rx_cqe * but without rmb() a weakly ordered processor can process * stale data. Without the barrier TPA state-machine might * enter inconsistent state and kernel stack might be * provided with incorrect packet description - these lead * to various kernel crashed.
*/
rmb();
/* Set Toeplitz hash for a none-LRO skb */
rxhash = bnx2x_get_rxhash(bp, cqe_fp, &rxhash_type);
skb_set_hash(skb, rxhash, rxhash_type);
skb_checksum_none_assert(skb);
if (bp->dev->features & NETIF_F_RXCSUM)
bnx2x_csum_validate(skb, cqe, fp,
bnx2x_fp_qstats(bp, fp));
skb_record_rx_queue(skb, fp->rx_queue);
/* Check if this packet was timestamped */ if (unlikely(cqe->fast_path_cqe.type_error_flags &
(1 << ETH_FAST_PATH_RX_CQE_PTP_PKT_SHIFT)))
bnx2x_set_rx_ts(bp, skb);
if (le16_to_cpu(cqe_fp->pars_flags.flags) &
PARSING_FLAGS_VLAN)
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
le16_to_cpu(cqe_fp->vlan_tag));
/* calculates MF speed according to current linespeed and MF configuration */
u16 bnx2x_get_mf_speed(struct bnx2x *bp)
{
u16 line_speed = bp->link_vars.line_speed; if (IS_MF(bp)) {
u16 maxCfg = bnx2x_extract_max_cfg(bp,
bp->mf_config[BP_VN(bp)]);
/* Calculate the current MAX line speed limit for the MF * devices
*/ if (IS_MF_PERCENT_BW(bp))
line_speed = (line_speed * maxCfg) / 100; else { /* SD mode */
u16 vn_max_rate = maxCfg * 100;
if (vn_max_rate < line_speed)
line_speed = vn_max_rate;
}
}
return line_speed;
}
/** * bnx2x_fill_report_data - fill link report data to report * * @bp: driver handle * @data: link state to update * * It uses a none-atomic bit operations because is called under the mutex.
*/ staticvoid bnx2x_fill_report_data(struct bnx2x *bp, struct bnx2x_link_report_data *data)
{
memset(data, 0, sizeof(*data));
if (IS_PF(bp)) { /* Fill the report data: effective line speed */
data->line_speed = bnx2x_get_mf_speed(bp);
/* Link is down */ if (!bp->link_vars.link_up || (bp->flags & MF_FUNC_DIS))
__set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
&data->link_report_flags);
if (!BNX2X_NUM_ETH_QUEUES(bp))
__set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
&data->link_report_flags);
/* Full DUPLEX */ if (bp->link_vars.duplex == DUPLEX_FULL)
__set_bit(BNX2X_LINK_REPORT_FD,
&data->link_report_flags);
/* Rx Flow Control is ON */ if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_RX)
__set_bit(BNX2X_LINK_REPORT_RX_FC_ON,
&data->link_report_flags);
/* Tx Flow Control is ON */ if (bp->link_vars.flow_ctrl & BNX2X_FLOW_CTRL_TX)
__set_bit(BNX2X_LINK_REPORT_TX_FC_ON,
&data->link_report_flags);
} else { /* VF */
*data = bp->vf_link_vars;
}
}
/** * bnx2x_link_report - report link status to OS. * * @bp: driver handle * * Calls the __bnx2x_link_report() under the same locking scheme * as a link/PHY state managing code to ensure a consistent link * reporting.
*/
/** * __bnx2x_link_report - report link status to OS. * * @bp: driver handle * * None atomic implementation. * Should be called under the phy_lock.
*/ void __bnx2x_link_report(struct bnx2x *bp)
{ struct bnx2x_link_report_data cur_data;
if (bp->force_link_down) {
bp->link_vars.link_up = 0; return;
}
/* reread mf_cfg */ if (IS_PF(bp) && !CHIP_IS_E1(bp))
bnx2x_read_mf_cfg(bp);
/* Read the current link report info */
bnx2x_fill_report_data(bp, &cur_data);
/* Don't report link down or exactly the same link status twice */ if (!memcmp(&cur_data, &bp->last_reported_link, sizeof(cur_data)) ||
(test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
&bp->last_reported_link.link_report_flags) &&
test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
&cur_data.link_report_flags))) return;
bp->link_cnt++;
/* We are going to report a new link parameters now - * remember the current data for the next time.
*/
memcpy(&bp->last_reported_link, &cur_data, sizeof(cur_data));
/* propagate status to VFs */ if (IS_PF(bp))
bnx2x_iov_link_update(bp);
if (test_bit(BNX2X_LINK_REPORT_LINK_DOWN,
&cur_data.link_report_flags)) {
netif_carrier_off(bp->dev);
netdev_err(bp->dev, "NIC Link is Down\n"); return;
} else { constchar *duplex; constchar *flow;
netif_carrier_on(bp->dev);
if (test_and_clear_bit(BNX2X_LINK_REPORT_FD,
&cur_data.link_report_flags))
duplex = "full"; else
duplex = "half";
/* Handle the FC at the end so that only these flags would be * possibly set. This way we may easily check if there is no FC * enabled.
*/ if (cur_data.link_report_flags) { if (test_bit(BNX2X_LINK_REPORT_RX_FC_ON,
&cur_data.link_report_flags)) { if (test_bit(BNX2X_LINK_REPORT_TX_FC_ON,
&cur_data.link_report_flags))
flow = "ON - receive & transmit"; else
flow = "ON - receive";
} else {
flow = "ON - transmit";
}
} else {
flow = "none";
}
netdev_info(bp->dev, "NIC Link is Up, %d Mbps %s duplex, Flow control: %s\n",
cur_data.line_speed, duplex, flow);
}
}
staticvoid bnx2x_set_next_page_sgl(struct bnx2x_fastpath *fp)
{ int i;
for (i = 1; i <= NUM_RX_SGE_PAGES; i++) { struct eth_rx_sge *sge;
/* Activate BD ring */ /* Warning! * this will generate an interrupt (to the TSTORM) * must only be done after chip is initialized
*/
bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod,
fp->rx_sge_prod);
}
}
void bnx2x_init_rx_rings(struct bnx2x *bp)
{ int func = BP_FUNC(bp);
u16 ring_prod; int i, j;
if (fp->mode != TPA_MODE_DISABLED) { /* Fill the per-aggregation pool */ for (i = 0; i < MAX_AGG_QS(bp); i++) { struct bnx2x_agg_info *tpa_info =
&fp->tpa_info[i]; struct sw_rx_bd *first_buf =
&tpa_info->first_buf;
first_buf->data =
bnx2x_frag_alloc(fp, GFP_KERNEL); if (!first_buf->data) {
BNX2X_ERR("Failed to allocate TPA skb pool for queue[%d] - disabling TPA on this queue!\n",
j);
bnx2x_free_tpa_pool(bp, fp, i);
fp->mode = TPA_MODE_DISABLED; break;
}
dma_unmap_addr_set(first_buf, mapping, 0);
tpa_info->tpa_state = BNX2X_TPA_STOP;
}
/* "next page" elements initialization */
bnx2x_set_next_page_sgl(fp);
/* set SGEs bit mask */
bnx2x_init_sge_ring_bit_mask(fp);
/* Allocate SGEs and initialize the ring elements */ for (i = 0, ring_prod = 0;
i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) {
if (bnx2x_alloc_rx_sge(bp, fp, ring_prod,
GFP_KERNEL) < 0) {
BNX2X_ERR("was only able to allocate %d rx sges\n",
i);
BNX2X_ERR("disabling TPA for queue[%d]\n",
j); /* Cleanup already allocated elements */
bnx2x_free_rx_sge_range(bp, fp,
ring_prod);
bnx2x_free_tpa_pool(bp, fp,
MAX_AGG_QS(bp));
fp->mode = TPA_MODE_DISABLED;
ring_prod = 0; break;
}
ring_prod = NEXT_SGE_IDX(ring_prod);
}
/* Activate BD ring */ /* Warning! * this will generate an interrupt (to the TSTORM) * must only be done after chip is initialized
*/
bnx2x_update_rx_prod(bp, fp, fp->rx_bd_prod, fp->rx_comp_prod,
fp->rx_sge_prod);
/** * bnx2x_free_msix_irqs - free previously requested MSI-X IRQ vectors * * @bp: driver handle * @nvecs: number of vectors to be released
*/ staticvoid bnx2x_free_msix_irqs(struct bnx2x *bp, int nvecs)
{ int i, offset = 0;
if (nvecs == offset) return;
/* VFs don't have a default SB */ if (IS_PF(bp)) {
free_irq(bp->msix_table[offset].vector, bp->dev);
DP(NETIF_MSG_IFDOWN, "released sp irq (%d)\n",
bp->msix_table[offset].vector);
offset++;
}
if (CNIC_SUPPORT(bp)) { if (nvecs == offset) return;
offset++;
}
for_each_eth_queue(bp, i) { if (nvecs == offset) return;
DP(NETIF_MSG_IFDOWN, "about to release fp #%d->%d irq\n",
i, bp->msix_table[offset].vector);
int bnx2x_enable_msix(struct bnx2x *bp)
{ int msix_vec = 0, i, rc;
/* VFs don't have a default status block */ if (IS_PF(bp)) {
bp->msix_table[msix_vec].entry = msix_vec;
BNX2X_DEV_INFO("msix_table[0].entry = %d (slowpath)\n",
bp->msix_table[0].entry);
msix_vec++;
}
/* Cnic requires an msix vector for itself */ if (CNIC_SUPPORT(bp)) {
bp->msix_table[msix_vec].entry = msix_vec;
BNX2X_DEV_INFO("msix_table[%d].entry = %d (CNIC)\n",
msix_vec, bp->msix_table[msix_vec].entry);
msix_vec++;
}
/* We need separate vectors for ETH queues only (not FCoE) */
for_each_eth_queue(bp, i) {
bp->msix_table[msix_vec].entry = msix_vec;
BNX2X_DEV_INFO("msix_table[%d].entry = %d (fastpath #%u)\n",
msix_vec, msix_vec, i);
msix_vec++;
}
DP(BNX2X_MSG_SP, "about to request enable msix with %d vectors\n",
msix_vec);
rc = pci_enable_msix_range(bp->pdev, &bp->msix_table[0],
BNX2X_MIN_MSIX_VEC_CNT(bp), msix_vec); /* * reconfigure number of tx/rx queues according to available * MSI-X vectors
*/ if (rc == -ENOSPC) { /* Get by with single vector */
rc = pci_enable_msix_range(bp->pdev, &bp->msix_table[0], 1, 1); if (rc < 0) {
BNX2X_DEV_INFO("Single MSI-X is not attainable rc %d\n",
rc); goto no_msix;
}
BNX2X_DEV_INFO("Using single MSI-X vector\n");
bp->flags |= USING_SINGLE_MSIX_FLAG;
BNX2X_DEV_INFO("set number of queues to 1\n");
bp->num_ethernet_queues = 1;
bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues;
} elseif (rc < 0) {
BNX2X_DEV_INFO("MSI-X is not attainable rc %d\n", rc); goto no_msix;
} elseif (rc < msix_vec) { /* how less vectors we will have? */ int diff = msix_vec - rc;
BNX2X_DEV_INFO("Trying to use less MSI-X vectors: %d\n", rc);
/* * decrease number of queues by number of unallocated entries
*/
bp->num_ethernet_queues -= diff;
bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues;
/* override in STORAGE SD modes */ if (IS_MF_STORAGE_ONLY(bp))
bp->num_ethernet_queues = 1;
/* Add special queues */
bp->num_cnic_queues = CNIC_SUPPORT(bp); /* For FCOE */
bp->num_queues = bp->num_ethernet_queues + bp->num_cnic_queues;
BNX2X_DEV_INFO("set number of queues to %d\n", bp->num_queues);
}
/** * bnx2x_set_real_num_queues - configure netdev->real_num_[tx,rx]_queues * * @bp: Driver handle * @include_cnic: handle cnic case * * We currently support for at most 16 Tx queues for each CoS thus we will * allocate a multiple of 16 for ETH L2 rings according to the value of the * bp->max_cos. * * If there is an FCoE L2 queue the appropriate Tx queue will have the next * index after all ETH L2 indices. * * If the actual number of Tx queues (for each CoS) is less than 16 then there * will be the holes at the end of each group of 16 ETh L2 indices (0..15, * 16..31,...) with indices that are not coupled with any real Tx queue. * * The proper configuration of skb->queue_mapping is handled by * bnx2x_select_queue() and __skb_tx_hash(). * * bnx2x_setup_tc() takes care of the proper TC mappings so that __skb_tx_hash() * will return a proper Tx index if TC is enabled (netdev->num_tc > 0).
*/ staticint bnx2x_set_real_num_queues(struct bnx2x *bp, int include_cnic)
{ int rc, tx, rx;
/* account for fcoe queue */ if (include_cnic && !NO_FCOE(bp)) {
rx++;
tx++;
}
rc = netif_set_real_num_tx_queues(bp->dev, tx); if (rc) {
BNX2X_ERR("Failed to set real number of Tx queues: %d\n", rc); return rc;
}
rc = netif_set_real_num_rx_queues(bp->dev, rx); if (rc) {
BNX2X_ERR("Failed to set real number of Rx queues: %d\n", rc); return rc;
}
DP(NETIF_MSG_IFUP, "Setting real num queues to (tx, rx) (%d, %d)\n",
tx, rx);
return rc;
}
staticvoid bnx2x_set_rx_buf_size(struct bnx2x *bp)
{ int i;
/* Always use a mini-jumbo MTU for the FCoE L2 ring */ if (IS_FCOE_IDX(i)) /* * Although there are no IP frames expected to arrive to * this ring we still want to add an * IP_HEADER_ALIGNMENT_PADDING to prevent a buffer * overrun attack.
*/
mtu = BNX2X_FCOE_MINI_JUMBO_MTU; else
mtu = bp->dev->mtu;
fp->rx_buf_size = BNX2X_FW_RX_ALIGN_START +
IP_HEADER_ALIGNMENT_PADDING +
ETH_OVERHEAD +
mtu +
BNX2X_FW_RX_ALIGN_END;
fp->rx_buf_size = SKB_DATA_ALIGN(fp->rx_buf_size); /* Note : rx_buf_size doesn't take into account NET_SKB_PAD */ if (fp->rx_buf_size + NET_SKB_PAD <= PAGE_SIZE)
fp->rx_frag_size = fp->rx_buf_size + NET_SKB_PAD; else
fp->rx_frag_size = 0;
}
}
/* Prepare the initial contents for the indirection table if RSS is * enabled
*/ for (i = 0; i < sizeof(bp->rss_conf_obj.ind_table); i++)
bp->rss_conf_obj.ind_table[i] =
bp->fp->cl_id +
ethtool_rxfh_indir_default(i, num_eth_queues);
/* * For 57710 and 57711 SEARCHER configuration (rss_keys) is * per-port, so if explicit configuration is needed , do it only * for a PMF. * * For 57712 and newer on the other hand it's a per-function * configuration.
*/ return bnx2x_config_rss_eth(bp, bp->port.pmf || !CHIP_IS_E1x(bp));
}
/* Although RSS is meaningless when there is a single HW queue we * still need it enabled in order to have HW Rx hash generated. * * if (!is_eth_multi(bp)) * bp->multi_mode = ETH_RSS_MODE_DISABLED;
*/
if (enable) {
__set_bit(BNX2X_RSS_MODE_REGULAR, ¶ms.rss_flags);
/* RSS configuration */
__set_bit(BNX2X_RSS_IPV4, ¶ms.rss_flags);
__set_bit(BNX2X_RSS_IPV4_TCP, ¶ms.rss_flags);
__set_bit(BNX2X_RSS_IPV6, ¶ms.rss_flags);
__set_bit(BNX2X_RSS_IPV6_TCP, ¶ms.rss_flags); if (rss_obj->udp_rss_v4)
__set_bit(BNX2X_RSS_IPV4_UDP, ¶ms.rss_flags); if (rss_obj->udp_rss_v6)
__set_bit(BNX2X_RSS_IPV6_UDP, ¶ms.rss_flags);
if (!CHIP_IS_E1x(bp)) { /* valid only for TUNN_MODE_VXLAN tunnel mode */
__set_bit(BNX2X_RSS_IPV4_VXLAN, ¶ms.rss_flags);
__set_bit(BNX2X_RSS_IPV6_VXLAN, ¶ms.rss_flags);
/* valid only for TUNN_MODE_GRE tunnel mode */
__set_bit(BNX2X_RSS_TUNN_INNER_HDRS, ¶ms.rss_flags);
}
} else {
__set_bit(BNX2X_RSS_MODE_DISABLED, ¶ms.rss_flags);
}
/* * Cleans the object that have internal lists without sending * ramrods. Should be run when interrupts are disabled.
*/ void bnx2x_squeeze_objects(struct bnx2x *bp)
{ int rc; unsignedlong ramrod_flags = 0, vlan_mac_flags = 0; struct bnx2x_mcast_ramrod_params rparam = {NULL}; struct bnx2x_vlan_mac_obj *mac_obj = &bp->sp_objs->mac_obj;
/***************** Cleanup MACs' object first *************************/
/* Wait for completion of requested */
__set_bit(RAMROD_COMP_WAIT, &ramrod_flags); /* Perform a dry cleanup */
__set_bit(RAMROD_DRV_CLR_ONLY, &ramrod_flags);
/* Clean ETH primary MAC */
__set_bit(BNX2X_ETH_MAC, &vlan_mac_flags);
rc = mac_obj->delete_all(bp, &bp->sp_objs->mac_obj, &vlan_mac_flags,
&ramrod_flags); if (rc != 0)
BNX2X_ERR("Failed to clean ETH MACs: %d\n", rc);
/* Cleanup UC list */
vlan_mac_flags = 0;
__set_bit(BNX2X_UC_LIST_MAC, &vlan_mac_flags);
rc = mac_obj->delete_all(bp, mac_obj, &vlan_mac_flags,
&ramrod_flags); if (rc != 0)
BNX2X_ERR("Failed to clean UC list MACs: %d\n", rc);
/* Add a DEL command... - Since we're doing a driver cleanup only, * we take a lock surrounding both the initial send and the CONTs, * as we don't want a true completion to disrupt us in the middle.
*/
netif_addr_lock_bh(bp->dev);
rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL); if (rc < 0)
BNX2X_ERR("Failed to add a new DEL command to a multi-cast object: %d\n",
rc);
/* ...and wait until all pending commands are cleared */
rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_CONT); while (rc != 0) { if (rc < 0) {
BNX2X_ERR("Failed to clean multi-cast object: %d\n",
rc);
netif_addr_unlock_bh(bp->dev); return;
}
staticint bnx2x_alloc_fw_stats_mem(struct bnx2x *bp)
{ int num_groups, vf_headroom = 0; int is_fcoe_stats = NO_FCOE(bp) ? 0 : 1;
/* number of queues for statistics is number of eth queues + FCoE */
u8 num_queue_stats = BNX2X_NUM_ETH_QUEUES(bp) + is_fcoe_stats;
/* Total number of FW statistics requests = * 1 for port stats + 1 for PF stats + potential 2 for FCoE (fcoe proper * and fcoe l2 queue) stats + num of queues (which includes another 1 * for fcoe l2 queue if applicable)
*/
bp->fw_stats_num = 2 + is_fcoe_stats + num_queue_stats;
/* vf stats appear in the request list, but their data is allocated by * the VFs themselves. We don't include them in the bp->fw_stats_num as * it is used to determine where to place the vf stats queries in the * request struct
*/ if (IS_SRIOV(bp))
vf_headroom = bnx2x_vf_headroom(bp);
/* Request is built from stats_query_header and an array of * stats_query_cmd_group each of which contains * STATS_QUERY_CMD_COUNT rules. The real number or requests is * configured in the stats_query_header.
*/
num_groups =
(((bp->fw_stats_num + vf_headroom) / STATS_QUERY_CMD_COUNT) +
(((bp->fw_stats_num + vf_headroom) % STATS_QUERY_CMD_COUNT) ?
1 : 0));
/* Data for statistics requests + stats_counter * stats_counter holds per-STORM counters that are incremented * when STORM has finished with the current request. * memory for FCoE offloaded statistics are counted anyway, * even if they will not be sent. * VF stats are not accounted for here as the data of VF stats is stored * in memory allocated by the VF, not here.
*/
bp->fw_stats_data_sz = sizeof(struct per_port_stats) + sizeof(struct per_pf_stats) + sizeof(struct fcoe_statistics_params) + sizeof(struct per_queue_stats) * num_queue_stats + sizeof(struct stats_counter);
bp->fw_stats = BNX2X_PCI_ALLOC(&bp->fw_stats_mapping,
bp->fw_stats_data_sz + bp->fw_stats_req_sz); if (!bp->fw_stats) goto alloc_mem_err;
DP(BNX2X_MSG_SP, "statistics request base address set to %x %x\n",
U64_HI(bp->fw_stats_req_mapping),
U64_LO(bp->fw_stats_req_mapping));
DP(BNX2X_MSG_SP, "statistics data base address set to %x %x\n",
U64_HI(bp->fw_stats_data_mapping),
U64_LO(bp->fw_stats_data_mapping)); return 0;
/* if mcp fails to respond we must abort */ if (!(*load_code)) {
BNX2X_ERR("MCP response failure, aborting\n"); return -EBUSY;
}
/* If mcp refused (e.g. other port is in diagnostic mode) we * must abort
*/ if ((*load_code) == FW_MSG_CODE_DRV_LOAD_REFUSED) {
BNX2X_ERR("MCP refused load request, aborting\n"); return -EBUSY;
} return 0;
}
/* check whether another PF has already loaded FW to chip. In * virtualized environments a pf from another VM may have already * initialized the device including loading FW
*/ int bnx2x_compare_fw_ver(struct bnx2x *bp, u32 load_code, bool print_err)
{ /* is another pf loaded on this engine? */ if (load_code != FW_MSG_CODE_DRV_LOAD_COMMON_CHIP &&
load_code != FW_MSG_CODE_DRV_LOAD_COMMON) {
u8 loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng;
u32 loaded_fw;
DP(BNX2X_MSG_SP, "loaded fw 0x%x major 0x%x minor 0x%x rev 0x%x eng 0x%x\n",
loaded_fw, loaded_fw_major, loaded_fw_minor, loaded_fw_rev, loaded_fw_eng);
/* abort nic load if version mismatch */ if (loaded_fw_major != BCM_5710_FW_MAJOR_VERSION ||
loaded_fw_minor != BCM_5710_FW_MINOR_VERSION ||
loaded_fw_eng != BCM_5710_FW_ENGINEERING_VERSION ||
loaded_fw_rev < BCM_5710_FW_REVISION_VERSION_V15) { if (print_err)
BNX2X_ERR("loaded FW incompatible. Aborting\n"); else
BNX2X_DEV_INFO("loaded FW incompatible, possibly due to MF UNDI\n");
return -EBUSY;
}
} return 0;
}
/* returns the "mcp load_code" according to global load_count array */ staticint bnx2x_nic_load_no_mcp(struct bnx2x *bp, int port)
{ int path = BP_PATH(bp);
/* mark PMF if applicable */ staticvoid bnx2x_nic_load_pmf(struct bnx2x *bp, u32 load_code)
{ if ((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
(load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP) ||
(load_code == FW_MSG_CODE_DRV_LOAD_PORT)) {
bp->port.pmf = 1; /* We need the barrier to ensure the ordering between the * writing to bp->port.pmf here and reading it from the * bnx2x_periodic_task().
*/
smp_mb();
} else {
bp->port.pmf = 0;
}
DP(NETIF_MSG_LINK, "pmf %d\n", bp->port.pmf);
}
staticvoid bnx2x_nic_load_afex_dcc(struct bnx2x *bp, int load_code)
{ if (((load_code == FW_MSG_CODE_DRV_LOAD_COMMON) ||
(load_code == FW_MSG_CODE_DRV_LOAD_COMMON_CHIP)) &&
(bp->common.shmem2_base)) { if (SHMEM2_HAS(bp, dcc_support))
SHMEM2_WR(bp, dcc_support,
(SHMEM_DCC_SUPPORT_DISABLE_ENABLE_PF_TLV |
SHMEM_DCC_SUPPORT_BANDWIDTH_ALLOCATION_TLV)); if (SHMEM2_HAS(bp, afex_driver_support))
SHMEM2_WR(bp, afex_driver_support,
SHMEM_AFEX_SUPPORTED_VERSION_ONE);
}
/* Set AFEX default VLAN tag to an invalid value */
bp->afex_def_vlan_tag = -1;
}
/** * bnx2x_bz_fp - zero content of the fastpath structure. * * @bp: driver handle * @index: fastpath index to be zeroed * * Makes sure the contents of the bp->fp[index].napi is kept * intact.
*/ staticvoid bnx2x_bz_fp(struct bnx2x *bp, int index)
{ struct bnx2x_fastpath *fp = &bp->fp[index]; int cos; struct napi_struct orig_napi = fp->napi; struct bnx2x_agg_info *orig_tpa_info = fp->tpa_info;
/* Restore the NAPI object as it has been already initialized */
fp->napi = orig_napi;
fp->tpa_info = orig_tpa_info;
fp->bp = bp;
fp->index = index; if (IS_ETH_FP(fp))
fp->max_cos = bp->max_cos; else /* Special queues support only one CoS */
fp->max_cos = 1;
/* set the tpa flag for each queue. The tpa flag determines the queue * minimal size so it must be set prior to queue memory allocation
*/ if (bp->dev->features & NETIF_F_LRO)
fp->mode = TPA_MODE_LRO; elseif (bp->dev->features & NETIF_F_GRO_HW)
fp->mode = TPA_MODE_GRO; else
fp->mode = TPA_MODE_DISABLED;
/* We don't want TPA if it's disabled in bp * or if this is an FCoE L2 ring.
*/ if (bp->disable_tpa || IS_FCOE_FP(fp))
fp->mode = TPA_MODE_DISABLED;
}
if (IS_PF(bp)) {
rc = bnx2x_alloc_mem_cnic(bp); if (rc) {
BNX2X_ERR("Unable to allocate bp memory for cnic\n");
LOAD_ERROR_EXIT_CNIC(bp, load_error_cnic0);
}
}
rc = bnx2x_alloc_fp_mem_cnic(bp); if (rc) {
BNX2X_ERR("Unable to allocate memory for cnic fps\n");
LOAD_ERROR_EXIT_CNIC(bp, load_error_cnic0);
}
/* Update the number of queues with the cnic queues */
rc = bnx2x_set_real_num_queues(bp, 1); if (rc) {
BNX2X_ERR("Unable to set real_num_queues including cnic\n");
LOAD_ERROR_EXIT_CNIC(bp, load_error_cnic0);
}
/* Add all CNIC NAPI objects */
bnx2x_add_all_napi_cnic(bp);
DP(NETIF_MSG_IFUP, "cnic napi added\n");
bnx2x_napi_enable_cnic(bp);
rc = bnx2x_init_hw_func_cnic(bp); if (rc)
LOAD_ERROR_EXIT_CNIC(bp, load_error_cnic1);
load_error_cnic1:
bnx2x_napi_disable_cnic(bp); /* Update the number of queues without the cnic queues */ if (bnx2x_set_real_num_queues(bp, 0))
BNX2X_ERR("Unable to set real_num_queues not including cnic\n");
load_error_cnic0:
BNX2X_ERR("CNIC-related load failed\n");
bnx2x_free_fp_mem_cnic(bp);
bnx2x_free_mem_cnic(bp); return rc; #endif/* ! BNX2X_STOP_ON_ERROR */
}
/* must be called with rtnl_lock */ int bnx2x_nic_load(struct bnx2x *bp, int load_mode)
{ int port = BP_PORT(bp); int i, rc = 0, load_code = 0;
DP(NETIF_MSG_IFUP, "Starting NIC load\n");
DP(NETIF_MSG_IFUP, "CNIC is %s\n", CNIC_ENABLED(bp) ? "enabled" : "disabled");
#ifdef BNX2X_STOP_ON_ERROR if (unlikely(bp->panic)) {
BNX2X_ERR("Can't load NIC when there is panic\n"); return -EPERM;
} #endif
bp->state = BNX2X_STATE_OPENING_WAIT4_LOAD;
/* zero the structure w/o any lock, before SP handler is initialized */
memset(&bp->last_reported_link, 0, sizeof(bp->last_reported_link));
__set_bit(BNX2X_LINK_REPORT_LINK_DOWN,
&bp->last_reported_link.link_report_flags);
if (IS_PF(bp)) /* must be called before memory allocation and HW init */
bnx2x_ilt_set_info(bp);
/* * Zero fastpath structures preserving invariants like napi, which are * allocated only once, fp index, max_cos, bp pointer. * Also set fp->mode and txdata_ptr.
*/
DP(NETIF_MSG_IFUP, "num queues: %d", bp->num_queues);
for_each_queue(bp, i)
bnx2x_bz_fp(bp, i);
memset(bp->bnx2x_txq, 0, (BNX2X_MAX_RSS_COUNT(bp) * BNX2X_MULTI_TX_COS +
bp->num_cnic_queues) * sizeof(struct bnx2x_fp_txdata));
bp->fcoe_init = false;
/* Set the receive queues buffer size */
bnx2x_set_rx_buf_size(bp);
if (IS_PF(bp)) {
rc = bnx2x_alloc_mem(bp); if (rc) {
BNX2X_ERR("Unable to allocate bp memory\n"); return rc;
}
}
/* need to be done after alloc mem, since it's self adjusting to amount * of memory available for RSS queues
*/
rc = bnx2x_alloc_fp_mem(bp); if (rc) {
BNX2X_ERR("Unable to allocate memory for fps\n");
LOAD_ERROR_EXIT(bp, load_error0);
}
/* Allocated memory for FW statistics */
rc = bnx2x_alloc_fw_stats_mem(bp); if (rc)
LOAD_ERROR_EXIT(bp, load_error0);
/* request pf to initialize status blocks */ if (IS_VF(bp)) {
rc = bnx2x_vfpf_init(bp); if (rc)
LOAD_ERROR_EXIT(bp, load_error0);
}
/* As long as bnx2x_alloc_mem() may possibly update * bp->num_queues, bnx2x_set_real_num_queues() should always * come after it. At this stage cnic queues are not counted.
*/
rc = bnx2x_set_real_num_queues(bp, 0); if (rc) {
BNX2X_ERR("Unable to set real_num_queues\n");
LOAD_ERROR_EXIT(bp, load_error0);
}
/* configure multi cos mappings in kernel. * this configuration may be overridden by a multi class queue * discipline or by a dcbx negotiation result.
*/
bnx2x_setup_tc(bp->dev, bp->max_cos);
/* Add all NAPI objects */
bnx2x_add_all_napi(bp);
DP(NETIF_MSG_IFUP, "napi added\n");
bnx2x_napi_enable(bp);
bp->nic_stopped = false;
if (IS_PF(bp)) { /* set pf load just before approaching the MCP */
bnx2x_set_pf_load(bp);
/* if mcp exists send load request and analyze response */ if (!BP_NOMCP(bp)) { /* attempt to load pf */
rc = bnx2x_nic_load_request(bp, &load_code); if (rc)
LOAD_ERROR_EXIT(bp, load_error1);
/* what did mcp say? */
rc = bnx2x_compare_fw_ver(bp, load_code, true); if (rc) {
bnx2x_fw_command(bp, DRV_MSG_CODE_LOAD_DONE, 0);
LOAD_ERROR_EXIT(bp, load_error2);
}
} else {
load_code = bnx2x_nic_load_no_mcp(bp, port);
}
/* mark pmf if applicable */
bnx2x_nic_load_pmf(bp, load_code);
/* Init Function state controlling object */
bnx2x__init_func_obj(bp);
if (bp->flags & PTP_SUPPORTED) {
bnx2x_register_phc(bp);
bnx2x_init_ptp(bp);
bnx2x_configure_ptp_filters(bp);
} /* Start Tx */ switch (load_mode) { case LOAD_NORMAL: /* Tx queue should be only re-enabled */
netif_tx_wake_all_queues(bp->dev); break;
case LOAD_OPEN:
netif_tx_start_all_queues(bp->dev);
smp_mb__after_atomic(); break;
case LOAD_DIAG: case LOAD_LOOPBACK_EXT:
bp->state = BNX2X_STATE_DIAG; break;
default: break;
}
if (bp->port.pmf)
bnx2x_update_drv_flags(bp, 1 << DRV_FLAGS_PORT_MASK, 0); else
bnx2x__link_status_update(bp);
/* start the timer */
mod_timer(&bp->timer, jiffies + bp->current_interval);
if (CNIC_ENABLED(bp))
bnx2x_load_cnic(bp);
if (IS_PF(bp))
bnx2x_schedule_sp_rtnl(bp, BNX2X_SP_RTNL_GET_DRV_VERSION, 0);
if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) { /* mark driver is loaded in shmem2 */
u32 val;
val = SHMEM2_RD(bp, drv_capabilities_flag[BP_FW_MB_IDX(bp)]);
val &= ~DRV_FLAGS_MTU_MASK;
val |= (bp->dev->mtu << DRV_FLAGS_MTU_SHIFT);
SHMEM2_WR(bp, drv_capabilities_flag[BP_FW_MB_IDX(bp)],
val | DRV_FLAGS_CAPABILITIES_LOADED_SUPPORTED |
DRV_FLAGS_CAPABILITIES_LOADED_L2);
}
/* Wait for all pending SP commands to complete */ if (IS_PF(bp) && !bnx2x_wait_sp_comp(bp, ~0x0UL)) {
BNX2X_ERR("Timeout waiting for SP elements to complete\n");
bnx2x_nic_unload(bp, UNLOAD_CLOSE, false); return -EBUSY;
}
/* Update driver data for On-Chip MFW dump. */ if (IS_PF(bp))
bnx2x_update_mfw_dump(bp);
/* If PMF - send ADMIN DCBX msg to MFW to initiate DCBX FSM */ if (bp->port.pmf && (bp->state != BNX2X_STATE_DIAG))
bnx2x_dcbx_init(bp, false);
if (!IS_MF_SD_STORAGE_PERSONALITY_ONLY(bp))
bnx2x_set_os_driver_state(bp, OS_DRIVER_STATE_ACTIVE);
DP(NETIF_MSG_IFUP, "Ending successfully NIC load\n");
return 0;
#ifndef BNX2X_STOP_ON_ERROR
load_error3: if (IS_PF(bp)) {
bnx2x_int_disable_sync(bp, 1);
/* clear pf_load status, as it was already set */ if (IS_PF(bp))
bnx2x_clear_pf_load(bp);
load_error0:
bnx2x_free_fw_stats_mem(bp);
bnx2x_free_fp_mem(bp);
bnx2x_free_mem(bp);
/* must be called with rtnl_lock */ int bnx2x_nic_unload(struct bnx2x *bp, int unload_mode, bool keep_link)
{ int i; bool global = false;
DP(NETIF_MSG_IFUP, "Starting NIC unload\n");
if (!IS_MF_SD_STORAGE_PERSONALITY_ONLY(bp))
bnx2x_set_os_driver_state(bp, OS_DRIVER_STATE_DISABLED);
/* mark driver is unloaded in shmem2 */ if (IS_PF(bp) && SHMEM2_HAS(bp, drv_capabilities_flag)) {
u32 val;
val = SHMEM2_RD(bp, drv_capabilities_flag[BP_FW_MB_IDX(bp)]);
SHMEM2_WR(bp, drv_capabilities_flag[BP_FW_MB_IDX(bp)],
val & ~DRV_FLAGS_CAPABILITIES_LOADED_L2);
}
if (IS_PF(bp) && bp->recovery_state != BNX2X_RECOVERY_DONE &&
(bp->state == BNX2X_STATE_CLOSED ||
bp->state == BNX2X_STATE_ERROR)) { /* We can get here if the driver has been unloaded * during parity error recovery and is either waiting for a * leader to complete or for other functions to unload and * then ifdown has been issued. In this case we want to * unload and let other functions to complete a recovery * process.
*/
bp->recovery_state = BNX2X_RECOVERY_DONE;
bp->is_leader = 0;
bnx2x_release_leader_lock(bp);
smp_mb();
DP(NETIF_MSG_IFDOWN, "Releasing a leadership...\n");
BNX2X_ERR("Can't unload in closed or error state\n"); return -EINVAL;
}
/* Nothing to do during unload if previous bnx2x_nic_load() * have not completed successfully - all resources are released. * * we can get here only after unsuccessful ndo_* callback, during which * dev->IFF_UP flag is still on.
*/ if (bp->state == BNX2X_STATE_CLOSED || bp->state == BNX2X_STATE_ERROR) return 0;
/* It's important to set the bp->state to the value different from * BNX2X_STATE_OPEN and only then stop the Tx. Otherwise bnx2x_tx_int() * may restart the Tx from the NAPI context (see bnx2x_tx_int()).
*/
bp->state = BNX2X_STATE_CLOSING_WAIT4_HALT;
smp_mb();
/* indicate to VFs that the PF is going down */
bnx2x_iov_channel_down(bp);
if (CNIC_LOADED(bp))
bnx2x_cnic_notify(bp, CNIC_CTL_STOP_CMD);
if (IS_PF(bp) && !BP_NOMCP(bp)) { /* Set ALWAYS_ALIVE bit in shmem */
bp->fw_drv_pulse_wr_seq |= DRV_PULSE_ALWAYS_ALIVE;
bnx2x_drv_pulse(bp);
bnx2x_stats_handle(bp, STATS_EVENT_STOP);
bnx2x_save_statistics(bp);
}
/* wait till consumers catch up with producers in all queues. * If we're recovering, FW can't write to host so no reason * to wait for the queues to complete all Tx.
*/ if (unload_mode != UNLOAD_RECOVERY)
bnx2x_drain_tx_queues(bp);
/* if VF indicate to PF this function is going down (PF will delete sp * elements and clear initializations
*/ if (IS_VF(bp)) {
bnx2x_clear_vlan_info(bp);
bnx2x_vfpf_close_vf(bp);
} elseif (unload_mode != UNLOAD_RECOVERY) { /* if this is a normal/close unload need to clean up chip*/
bnx2x_chip_cleanup(bp, unload_mode, keep_link);
} else { /* Send the UNLOAD_REQUEST to the MCP */
bnx2x_send_unload_req(bp, unload_mode);
/* Prevent transactions to host from the functions on the * engine that doesn't reset global blocks in case of global * attention once global blocks are reset and gates are opened * (the engine which leader will perform the recovery * last).
*/ if (!CHIP_IS_E1x(bp))
bnx2x_pf_disable(bp);
if (!bp->nic_stopped) { /* Disable HW interrupts, NAPI */
bnx2x_netif_stop(bp, 1); /* Delete all NAPI objects */
bnx2x_del_all_napi(bp); if (CNIC_LOADED(bp))
bnx2x_del_all_napi_cnic(bp); /* Release IRQs */
bnx2x_free_irq(bp);
bp->nic_stopped = true;
}
/* Report UNLOAD_DONE to MCP */
bnx2x_send_unload_done(bp, false);
}
/* * At this stage no more interrupts will arrive so we may safely clean * the queueable objects here in case they failed to get cleaned so far.
*/ if (IS_PF(bp))
bnx2x_squeeze_objects(bp);
/* There should be no more pending SP commands at this stage */
bp->sp_state = 0;
bp->port.pmf = 0;
/* clear pending work in rtnl task */
bp->sp_rtnl_state = 0;
smp_mb();
/* Free SKBs, SGEs, TPA pool and driver internals */
bnx2x_free_skbs(bp); if (CNIC_LOADED(bp))
bnx2x_free_skbs_cnic(bp);
for_each_rx_queue(bp, i)
bnx2x_free_rx_sge_range(bp, bp->fp + i, NUM_RX_SGE);
bnx2x_free_fp_mem(bp); if (CNIC_LOADED(bp))
bnx2x_free_fp_mem_cnic(bp);
if (IS_PF(bp)) { if (CNIC_LOADED(bp))
bnx2x_free_mem_cnic(bp);
}
bnx2x_free_mem(bp);
/* Clear driver version indication in shmem */ if (IS_PF(bp) && !BP_NOMCP(bp))
bnx2x_update_mng_version(bp);
/* Check if there are pending parity attentions. If there are - set * RECOVERY_IN_PROGRESS.
*/ if (IS_PF(bp) && bnx2x_chk_parity_attn(bp, &global, false)) {
bnx2x_set_reset_in_progress(bp);
/* Set RESET_IS_GLOBAL if needed */ if (global)
bnx2x_set_reset_global(bp);
}
/* The last driver must disable a "close the gate" if there is no * parity attention or "process kill" pending.
*/ if (IS_PF(bp) &&
!bnx2x_clear_pf_load(bp) &&
bnx2x_reset_is_done(bp, BP_PATH(bp)))
bnx2x_disable_close_the_gate(bp);
DP(NETIF_MSG_IFUP, "Ending NIC unload\n");
return 0;
}
int bnx2x_set_power_state(struct bnx2x *bp, pci_power_t state)
{
u16 pmcsr;
/* If there is no power capability, silently succeed */ if (!bp->pdev->pm_cap) {
BNX2X_DEV_INFO("No power capability. Breaking.\n"); return 0;
}
if (pmcsr & PCI_PM_CTRL_STATE_MASK) /* delay required during transition out of D3hot */
msleep(20); break;
case PCI_D3hot: /* If there are other clients above don't
shut down the power */ if (atomic_read(&bp->pdev->enable_cnt) != 1) return 0; /* Don't shut down the power for emulation and FPGA */ if (CHIP_REV_IS_SLOW(bp)) return 0;
if (rx_work_done < budget) { /* No need to update SB for FCoE L2 ring as long as * it's connected to the default SB and the SB * has been updated when NAPI was scheduled.
*/ if (IS_FCOE_FP(fp)) {
napi_complete_done(napi, rx_work_done);
} else {
bnx2x_update_fpsb_idx(fp); /* bnx2x_has_rx_work() reads the status block, * thus we need to ensure that status block indices * have been actually read (bnx2x_update_fpsb_idx) * prior to this check (bnx2x_has_rx_work) so that * we won't write the "newer" value of the status block * to IGU (if there was a DMA right after * bnx2x_has_rx_work and if there is no rmb, the memory * reading (bnx2x_update_fpsb_idx) may be postponed * to right before bnx2x_ack_sb). In this case there * will never be another interrupt until there is * another update of the status block, while there * is still unhandled work.
*/
rmb();
if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) { if (napi_complete_done(napi, rx_work_done)) { /* Re-enable interrupts */
DP(NETIF_MSG_RX_STATUS, "Update index to %d\n", fp->fp_hc_idx);
bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID,
le16_to_cpu(fp->fp_hc_idx),
IGU_INT_ENABLE, 1);
}
} else {
rx_work_done = budget;
}
}
}
return rx_work_done;
}
/* we split the first BD into headers and data BDs * to ease the pain of our fellow microcode engineers * we use one mapping for both BDs
*/ static u16 bnx2x_tx_split(struct bnx2x *bp, struct bnx2x_fp_txdata *txdata, struct sw_tx_bd *tx_buf, struct eth_tx_start_bd **tx_bd, u16 hlen,
u16 bd_prod)
{ struct eth_tx_start_bd *h_tx_bd = *tx_bd; struct eth_tx_bd *d_tx_bd;
dma_addr_t mapping; int old_len = le16_to_cpu(h_tx_bd->nbytes);
/* first fix first BD */
h_tx_bd->nbytes = cpu_to_le16(hlen);
if (!CHIP_IS_E1x(bp) && skb->encapsulation) { if (inner_ip_hdr(skb)->version == 6) {
rc |= XMIT_CSUM_ENC_V6; if (inner_ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
rc |= XMIT_CSUM_TCP;
} else {
rc |= XMIT_CSUM_ENC_V4; if (inner_ip_hdr(skb)->protocol == IPPROTO_TCP)
rc |= XMIT_CSUM_TCP;
}
} if (prot == IPPROTO_TCP)
rc |= XMIT_CSUM_TCP;
if (skb_is_gso(skb)) { if (skb_is_gso_v6(skb)) {
rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP); if (rc & XMIT_CSUM_ENC)
rc |= XMIT_GSO_ENC_V6;
} else {
rc |= (XMIT_GSO_V4 | XMIT_CSUM_TCP); if (rc & XMIT_CSUM_ENC)
rc |= XMIT_GSO_ENC_V4;
}
}
return rc;
}
/* VXLAN: 4 = 1 (for linear data BD) + 3 (2 for PBD and last BD) */ #define BNX2X_NUM_VXLAN_TSO_WIN_SUB_BDS 4
/* Regular: 3 = 1 (for linear data BD) + 2 (for PBD and last BD) */ #define BNX2X_NUM_TSO_WIN_SUB_BDS 3
#if (MAX_SKB_FRAGS >= MAX_FETCH_BD - BDS_PER_TX_PKT) /* check if packet requires linearization (packet is too fragmented) no need to check fragmentation if page size > 8K (there will be no
violation to FW restrictions) */ staticint bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb,
u32 xmit_type)
{ int first_bd_sz = 0, num_tso_win_sub = BNX2X_NUM_TSO_WIN_SUB_BDS; int to_copy = 0, hlen = 0;
if (xmit_type & XMIT_GSO_ENC)
num_tso_win_sub = BNX2X_NUM_VXLAN_TSO_WIN_SUB_BDS;
if (skb_shinfo(skb)->nr_frags >= (MAX_FETCH_BD - num_tso_win_sub)) { if (xmit_type & XMIT_GSO) { unsignedshort lso_mss = skb_shinfo(skb)->gso_size; int wnd_size = MAX_FETCH_BD - num_tso_win_sub; /* Number of windows to check */ int num_wnds = skb_shinfo(skb)->nr_frags - wnd_size; int wnd_idx = 0; int frag_idx = 0;
u32 wnd_sum = 0;
/* Amount of data (w/o headers) on linear part of SKB*/
first_bd_sz = skb_headlen(skb) - hlen;
wnd_sum = first_bd_sz;
/* Calculate the first sum - it's special */ for (frag_idx = 0; frag_idx < wnd_size - 1; frag_idx++)
wnd_sum +=
skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]);
/* If there was data on linear skb data - check it */ if (first_bd_sz > 0) { if (unlikely(wnd_sum < lso_mss)) {
to_copy = 1; goto exit_lbl;
}
wnd_sum -= first_bd_sz;
}
/* Others are easier: run through the frag list and
check all windows */ for (wnd_idx = 0; wnd_idx <= num_wnds; wnd_idx++) {
wnd_sum +=
skb_frag_size(&skb_shinfo(skb)->frags[wnd_idx + wnd_size - 1]);
if (unlikely(wnd_sum < lso_mss)) {
to_copy = 1; break;
}
wnd_sum -=
skb_frag_size(&skb_shinfo(skb)->frags[wnd_idx]);
}
} else { /* in non-LSO too fragmented packet should always
be linearized */
to_copy = 1;
}
}
exit_lbl: if (unlikely(to_copy))
DP(NETIF_MSG_TX_QUEUED, "Linearization IS REQUIRED for %s packet. num_frags %d hlen %d first_bd_sz %d\n",
(xmit_type & XMIT_GSO) ? "LSO" : "non-LSO",
skb_shinfo(skb)->nr_frags, hlen, first_bd_sz);
/* We support checksum offload for TCP and UDP only. * No need to pass the UDP header length - it's a constant.
*/ return skb_inner_transport_offset(skb) + sizeof(struct udphdr);
}
/** * bnx2x_set_pbd_csum_e2 - update PBD with checksum and return header length * * @bp: driver handle * @skb: packet skb * @parsing_data: data to be updated * @xmit_type: xmit flags * * 57712/578xx related
*/ static u8 bnx2x_set_pbd_csum_e2(struct bnx2x *bp, struct sk_buff *skb,
u32 *parsing_data, u32 xmit_type)
{
*parsing_data |=
((skb_transport_offset(skb) >> 1) <<
ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W_SHIFT) &
ETH_TX_PARSE_BD_E2_L4_HDR_START_OFFSET_W;
return skb_tcp_all_headers(skb);
} /* We support checksum offload for TCP and UDP only. * No need to pass the UDP header length - it's a constant.
*/ return skb_transport_offset(skb) + sizeof(struct udphdr);
}
/* set FW indication according to inner or outer protocols if tunneled */ staticvoid bnx2x_set_sbd_csum(struct bnx2x *bp, struct sk_buff *skb, struct eth_tx_start_bd *tx_start_bd,
u32 xmit_type)
{
tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_L4_CSUM;
if (xmit_type & (XMIT_CSUM_ENC_V6 | XMIT_CSUM_V6))
tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IPV6;
if (!(xmit_type & XMIT_CSUM_TCP))
tx_start_bd->bd_flags.as_bitfield |= ETH_TX_BD_FLAGS_IS_UDP;
}
/* for now NS flag is not used in Linux */
pbd->global_data =
cpu_to_le16(hlen |
((skb->protocol == cpu_to_be16(ETH_P_8021Q)) <<
ETH_TX_PARSE_BD_E1X_LLC_SNAP_EN_SHIFT));
/* We support checksum offload for TCP and UDP only */ if (xmit_type & XMIT_CSUM_TCP)
hlen += tcp_hdrlen(skb) / 2; else
hlen += sizeof(struct udphdr) / 2;
/* enable this debug print to view the transmission queue being used DP(NETIF_MSG_TX_QUEUED, "indices: txq %d, fp %d, txdata %d\n",
txq_index, fp_index, txdata_index); */
/* enable this debug print to view the transmission details DP(NETIF_MSG_TX_QUEUED, "transmitting packet cid %d fp index %d txdata_index %d tx_data ptr %p fp pointer %p\n",
txdata->cid, fp_index, txdata_index, txdata, fp); */
if (unlikely(bnx2x_tx_avail(bp, txdata) <
skb_shinfo(skb)->nr_frags +
BDS_PER_TX_PKT +
NEXT_CNT_PER_TX_PKT(MAX_BDS_PER_TX_PKT))) { /* Handle special storage cases separately */ if (txdata->tx_ring_size == 0) { struct bnx2x_eth_q_stats *q_stats =
bnx2x_fp_qstats(bp, txdata->parent_fp);
q_stats->driver_filtered_tx_pkt++;
dev_kfree_skb(skb); return NETDEV_TX_OK;
}
bnx2x_fp_qstats(bp, txdata->parent_fp)->driver_xoff++;
netif_tx_stop_queue(txq);
BNX2X_ERR("BUG! Tx ring full when queue awake!\n");
/* set flag according to packet type (UNICAST_ADDRESS is default)*/ if (unlikely(is_multicast_ether_addr(eth->h_dest))) { if (is_broadcast_ether_addr(eth->h_dest))
mac_type = BROADCAST_ADDRESS; else
mac_type = MULTICAST_ADDRESS;
}
#if (MAX_SKB_FRAGS >= MAX_FETCH_BD - BDS_PER_TX_PKT) /* First, check if we need to linearize the skb (due to FW restrictions). No need to check fragmentation if page size > 8K
(there will be no violation to FW restrictions) */ if (bnx2x_pkt_req_lin(bp, skb, xmit_type)) { /* Statistics of linearization */
bp->lin_cnt++; if (skb_linearize(skb) != 0) {
DP(NETIF_MSG_TX_QUEUED, "SKB linearization failed - silently dropping this SKB\n");
dev_kfree_skb_any(skb); return NETDEV_TX_OK;
}
} #endif /* Map skb linear data for DMA */
mapping = dma_map_single(&bp->pdev->dev, skb->data,
skb_headlen(skb), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) {
DP(NETIF_MSG_TX_QUEUED, "SKB mapping failed - silently dropping this SKB\n");
dev_kfree_skb_any(skb); return NETDEV_TX_OK;
} /* Please read carefully. First we use one BD which we mark as start, then we have a parsing info BD (used for TSO or xsum), and only then we have the rest of the TSO BDs. (don't forget to mark the last one as last, and to unmap only AFTER you write to the BD ...) And above all, all pdb sizes are in words - NOT DWORDS!
*/
/* get current pkt produced now - advance it just before sending packet * since mapping of pages may fail and cause packet to be dropped
*/
pkt_prod = txdata->tx_pkt_prod;
bd_prod = TX_BD(txdata->tx_bd_prod);
/* get a tx_buf and first BD * tx_start_bd may be changed during SPLIT, * but first_bd will always stay first
*/
tx_buf = &txdata->tx_buf_ring[TX_BD(pkt_prod)];
tx_start_bd = &txdata->tx_desc_ring[bd_prod].start_bd;
first_bd = tx_start_bd;
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { if (!(bp->flags & TX_TIMESTAMPING_EN)) {
bp->eth_stats.ptp_skip_tx_ts++;
BNX2X_ERR("Tx timestamping was not enabled, this packet will not be timestamped\n");
} elseif (bp->ptp_tx_skb) {
bp->eth_stats.ptp_skip_tx_ts++;
netdev_err_once(bp->dev, "Device supports only a single outstanding packet to timestamp, this packet won't be timestamped\n");
} else {
skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; /* schedule check for Tx timestamp */
bp->ptp_tx_skb = skb_get(skb);
bp->ptp_tx_start = jiffies;
schedule_work(&bp->ptp_task);
}
}
/* header nbd: indirectly zero other flags! */
tx_start_bd->general_data = 1 << ETH_TX_START_BD_HDR_NBDS_SHIFT;
/* remember the first BD of the packet */
tx_buf->first_bd = txdata->tx_bd_prod;
tx_buf->skb = skb;
tx_buf->flags = 0;
if (skb_vlan_tag_present(skb)) {
tx_start_bd->vlan_or_ethertype =
cpu_to_le16(skb_vlan_tag_get(skb));
tx_start_bd->bd_flags.as_bitfield |=
(X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
} else { /* when transmitting in a vf, start bd must hold the ethertype * for fw to enforce it
*/
u16 vlan_tci = 0; #ifndef BNX2X_STOP_ON_ERROR if (IS_VF(bp)) { #endif /* Still need to consider inband vlan for enforced */ if (__vlan_get_tag(skb, &vlan_tci)) {
tx_start_bd->vlan_or_ethertype =
cpu_to_le16(ntohs(eth->h_proto));
} else {
tx_start_bd->bd_flags.as_bitfield |=
(X_ETH_INBAND_VLAN <<
ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
tx_start_bd->vlan_or_ethertype =
cpu_to_le16(vlan_tci);
} #ifndef BNX2X_STOP_ON_ERROR
} else { /* used by FW for packet accounting */
tx_start_bd->vlan_or_ethertype = cpu_to_le16(pkt_prod);
} #endif
}
nbd = 2; /* start_bd + pbd + frags (updated when pages are mapped) */
/* turn on parsing and get a BD */
bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
if (xmit_type & XMIT_CSUM)
bnx2x_set_sbd_csum(bp, skb, tx_start_bd, xmit_type);
if (!CHIP_IS_E1x(bp)) {
pbd_e2 = &txdata->tx_desc_ring[bd_prod].parse_bd_e2;
memset(pbd_e2, 0, sizeof(struct eth_tx_parse_bd_e2));
if (xmit_type & XMIT_CSUM_ENC) {
u16 global_data = 0;
/* Set PBD in enc checksum offload case */
hlen = bnx2x_set_pbd_csum_enc(bp, skb,
&pbd_e2_parsing_data,
xmit_type);
/* turn on 2nd parsing and get a BD */
bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
if (xmit_type & XMIT_GSO_ENC)
bnx2x_update_pbds_gso_enc(skb, pbd_e2, pbd2,
&global_data,
xmit_type);
pbd2->global_data = cpu_to_le16(global_data);
/* add addition parse BD indication to start BD */
SET_FLAG(tx_start_bd->general_data,
ETH_TX_START_BD_PARSE_NBDS, 1); /* set encapsulation flag in start BD */
SET_FLAG(tx_start_bd->general_data,
ETH_TX_START_BD_TUNNEL_EXIST, 1);
tx_buf->flags |= BNX2X_HAS_SECOND_PBD;
nbd++;
} elseif (xmit_type & XMIT_CSUM) { /* Set PBD in checksum offload case w/o encapsulation */
hlen = bnx2x_set_pbd_csum_e2(bp, skb,
&pbd_e2_parsing_data,
xmit_type);
}
bnx2x_set_ipv6_ext_e2(skb, &pbd_e2_parsing_data, xmit_type); /* Add the macs to the parsing BD if this is a vf or if * Tx Switching is enabled.
*/ if (IS_VF(bp)) { /* override GRE parameters in BD */
bnx2x_set_fw_mac_addr(&pbd_e2->data.mac_addr.src_hi,
&pbd_e2->data.mac_addr.src_mid,
&pbd_e2->data.mac_addr.src_lo,
eth->h_source);
bnx2x_set_fw_mac_addr(&pbd_e2->data.mac_addr.dst_hi,
&pbd_e2->data.mac_addr.dst_mid,
&pbd_e2->data.mac_addr.dst_lo,
eth->h_dest);
} else { if (bp->flags & TX_SWITCHING)
bnx2x_set_fw_mac_addr(
&pbd_e2->data.mac_addr.dst_hi,
&pbd_e2->data.mac_addr.dst_mid,
&pbd_e2->data.mac_addr.dst_lo,
eth->h_dest); #ifdef BNX2X_STOP_ON_ERROR /* Enforce security is always set in Stop on Error - * source mac should be present in the parsing BD
*/
bnx2x_set_fw_mac_addr(&pbd_e2->data.mac_addr.src_hi,
&pbd_e2->data.mac_addr.src_mid,
&pbd_e2->data.mac_addr.src_lo,
eth->h_source); #endif
}
SET_FLAG(pbd_e2_parsing_data,
ETH_TX_PARSE_BD_E2_ETH_ADDR_TYPE, mac_type);
} else {
u16 global_data = 0;
pbd_e1x = &txdata->tx_desc_ring[bd_prod].parse_bd_e1x;
memset(pbd_e1x, 0, sizeof(struct eth_tx_parse_bd_e1x)); /* Set PBD in checksum offload case */ if (xmit_type & XMIT_CSUM)
hlen = bnx2x_set_pbd_csum(bp, skb, pbd_e1x, xmit_type);
/* Setup the data pointer of the first BD of the packet */
tx_start_bd->addr_hi = cpu_to_le32(U64_HI(mapping));
tx_start_bd->addr_lo = cpu_to_le32(U64_LO(mapping));
tx_start_bd->nbytes = cpu_to_le16(skb_headlen(skb));
pkt_size = tx_start_bd->nbytes;
/* Set the PBD's parsing_data field if not zero * (for the chips newer than 57711).
*/ if (pbd_e2_parsing_data)
pbd_e2->parsing_data = cpu_to_le32(pbd_e2_parsing_data);
tx_data_bd = (struct eth_tx_bd *)tx_start_bd;
/* Handle fragmented skb */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
DP(NETIF_MSG_TX_QUEUED, "Unable to map page - dropping packet...\n");
/* we need unmap all buffers already mapped * for this SKB; * first_bd->nbd need to be properly updated * before call to bnx2x_free_tx_pkt
*/
first_bd->nbd = cpu_to_le16(nbd);
bnx2x_free_tx_pkt(bp, txdata,
TX_BD(txdata->tx_pkt_prod),
&pkts_compl, &bytes_compl); return NETDEV_TX_OK;
}
/* update with actual num BDs */
first_bd->nbd = cpu_to_le16(nbd);
bd_prod = TX_BD(NEXT_TX_IDX(bd_prod));
/* now send a tx doorbell, counting the next BD * if the packet contains or ends with it
*/ if (TX_BD_POFF(bd_prod) < nbd)
nbd++;
/* total_pkt_bytes should be set on the first data BD if * it's not an LSO packet and there is more than one * data BD. In this case pkt_size is limited by an MTU value. * However we prefer to set it for an LSO packet (while we don't * have to) in order to save some CPU cycles in a none-LSO * case, when we much more care about them.
*/ if (total_pkt_bd != NULL)
total_pkt_bd->total_pkt_bytes = pkt_size;
txdata->tx_pkt_prod++; /* * Make sure that the BD data is updated before updating the producer * since FW might read the BD right after the producer is updated. * This is only applicable for weak-ordered memory model archs such * as IA-64. The following barrier is also mandatory since FW will * assumes packets must have BDs.
*/
wmb();
txdata->tx_db.data.prod += nbd; /* make sure descriptor update is observed by HW */
wmb();
if (unlikely(bnx2x_tx_avail(bp, txdata) < MAX_DESC_PER_TX_PKT)) {
netif_tx_stop_queue(txq);
/* paired memory barrier is in bnx2x_tx_int(), we have to keep * ordering of set_bit() in netif_tx_stop_queue() and read of
* fp->bd_tx_cons */
smp_mb();
bnx2x_fp_qstats(bp, txdata->parent_fp)->driver_xoff++; if (bnx2x_tx_avail(bp, txdata) >= MAX_DESC_PER_TX_PKT)
netif_tx_wake_queue(txq);
}
txdata->tx_pkt++;
/** * bnx2x_setup_tc - routine to configure net_device for multi tc * * @dev: net device to configure * @num_tc: number of traffic classes to enable * * callback connected to the ndo_setup_tc function pointer
*/ int bnx2x_setup_tc(struct net_device *dev, u8 num_tc)
{ struct bnx2x *bp = netdev_priv(dev);
u8 c2s_map[BNX2X_MAX_PRIORITY], c2s_def; int cos, prio, count, offset;
/* setup tc must be called under rtnl lock */
ASSERT_RTNL();
/* no traffic classes requested. Aborting */ if (!num_tc) {
netdev_reset_tc(dev); return 0;
}
/* requested to support too many traffic classes */ if (num_tc > bp->max_cos) {
BNX2X_ERR("support for too many traffic classes requested: %d. Max supported is %d\n",
num_tc, bp->max_cos); return -EINVAL;
}
/* declare amount of supported traffic classes */ if (netdev_set_num_tc(dev, num_tc)) {
BNX2X_ERR("failed to declare %d traffic classes\n", num_tc); return -EINVAL;
}
bnx2x_get_c2s_mapping(bp, c2s_map, &c2s_def);
/* configure priority to traffic class mapping */ for (prio = 0; prio < BNX2X_MAX_PRIORITY; prio++) { int outer_prio = c2s_map[prio];
/* Use this configuration to differentiate tc0 from other COSes This can be used for ets or pfc, and save the effort of setting up a multio class queue disc or negotiating DCBX with a switch netdev_set_prio_tc_map(dev, 0, 0); DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n", 0, 0); for (prio = 1; prio < 16; prio++) { netdev_set_prio_tc_map(dev, prio, 1); DP(BNX2X_MSG_SP, "mapping priority %d to tc %d\n", prio, 1);
} */
/* configure traffic class to transmission queue mapping */ for (cos = 0; cos < bp->max_cos; cos++) {
count = BNX2X_NUM_ETH_QUEUES(bp);
offset = cos * BNX2X_NUM_NON_CNIC_QUEUES(bp);
netdev_set_tc_queue(dev, cos, count, offset);
DP(BNX2X_MSG_SP | NETIF_MSG_IFUP, "mapping tc %d to offset %d count %d\n",
cos, offset, count);
}
/* Returns the number of actually allocated BDs */ staticint bnx2x_alloc_rx_bds(struct bnx2x_fastpath *fp, int rx_ring_size)
{ struct bnx2x *bp = fp->bp;
u16 ring_prod, cqe_ring_prod; int i, failure_cnt = 0;
/* This routine is called only during fo init so * fp->eth_q_stats.rx_skb_alloc_failed = 0
*/ for (i = 0; i < rx_ring_size; i++) { if (bnx2x_alloc_rx_data(bp, fp, ring_prod, GFP_KERNEL) < 0) {
failure_cnt++; continue;
}
ring_prod = NEXT_RX_IDX(ring_prod);
cqe_ring_prod = NEXT_RCQ_IDX(cqe_ring_prod);
WARN_ON(ring_prod <= (i - failure_cnt));
}
if (failure_cnt)
BNX2X_ERR("was only able to allocate %d rx skbs on queue[%d]\n",
i - failure_cnt, fp->index);
fp->rx_bd_prod = ring_prod; /* Limit the CQE producer by the CQE ring size */
fp->rx_comp_prod = min_t(u16, NUM_RCQ_RINGS*RCQ_DESC_CNT,
cqe_ring_prod);
if (CHIP_IS_E3(bp)) {
u32 cfg = SHMEM_RD(bp,
dev_info.port_hw_config[BP_PORT(bp)].
default_cfg);
/* Decrease ring size for 1G functions */ if ((cfg & PORT_HW_CFG_NET_SERDES_IF_MASK) ==
PORT_HW_CFG_NET_SERDES_IF_SGMII)
rx_ring_size /= 10;
}
/* allocate at least number of buffers required by FW */
rx_ring_size = max_t(int, bp->disable_tpa ? MIN_RX_SIZE_NONTPA :
MIN_RX_SIZE_TPA, rx_ring_size);
bp->rx_ring_size = rx_ring_size;
} else/* if rx_ring_size specified - use it */
rx_ring_size = bp->rx_ring_size;
/* handles low memory cases */
alloc_mem_err:
BNX2X_ERR("Unable to allocate full memory for queue %d (size %d)\n",
index, ring_size); /* FW will drop all packets if queue is not big enough, * In these cases we disable the queue * Min size is different for OOO, TPA and non-TPA queues
*/ if (ring_size < (fp->mode == TPA_MODE_DISABLED ?
MIN_RX_SIZE_NONTPA : MIN_RX_SIZE_TPA)) { /* release memory allocated for this queue */
bnx2x_free_fp_mem_at(bp, index); return -ENOMEM;
} return 0;
}
staticint bnx2x_alloc_fp_mem_cnic(struct bnx2x *bp)
{ if (!NO_FCOE(bp)) /* FCoE */ if (bnx2x_alloc_fp_mem_at(bp, FCOE_IDX(bp))) /* we will fail load process instead of mark * NO_FCOE_FLAG
*/ return -ENOMEM;
return 0;
}
staticint bnx2x_alloc_fp_mem(struct bnx2x *bp)
{ int i;
/* 1. Allocate FP for leading - fatal if error * 2. Allocate RSS - fix number of queues if error
*/
/* leading */ if (bnx2x_alloc_fp_mem_at(bp, 0)) return -ENOMEM;
/* RSS */
for_each_nondefault_eth_queue(bp, i) if (bnx2x_alloc_fp_mem_at(bp, i)) break;
/* handle memory failures */ if (i != BNX2X_NUM_ETH_QUEUES(bp)) { int delta = BNX2X_NUM_ETH_QUEUES(bp) - i;
WARN_ON(delta < 0);
bnx2x_shrink_eth_fp(bp, delta); if (CNIC_SUPPORT(bp)) /* move non eth FPs next to last eth FP * must be done in that order * FCOE_IDX < FWD_IDX < OOO_IDX
*/
/* move FCoE fp even NO_FCOE_FLAG is on */
bnx2x_move_fp(bp, FCOE_IDX(bp), FCOE_IDX(bp) - delta);
bp->num_ethernet_queues -= delta;
bp->num_queues = bp->num_ethernet_queues +
bp->num_cnic_queues;
BNX2X_ERR("Adjusted num of queues from %d to %d\n",
bp->num_queues + delta, bp->num_queues);
}
return 0;
}
void bnx2x_free_mem_bp(struct bnx2x *bp)
{ int i;
for (i = 0; i < bp->fp_array_size; i++)
kfree(bp->fp[i].tpa_info);
kfree(bp->fp);
kfree(bp->sp_objs);
kfree(bp->fp_stats);
kfree(bp->bnx2x_txq);
kfree(bp->msix_table);
kfree(bp->ilt);
}
int bnx2x_alloc_mem_bp(struct bnx2x *bp)
{ struct bnx2x_fastpath *fp; struct msix_entry *tbl; struct bnx2x_ilt *ilt; int msix_table_size = 0; int fp_array_size, txq_array_size; int i;
/* * The biggest MSI-X table we might need is as a maximum number of fast * path IGU SBs plus default SB (for PF only).
*/
msix_table_size = bp->igu_sb_cnt; if (IS_PF(bp))
msix_table_size++;
BNX2X_DEV_INFO("msix_table_size %d\n", msix_table_size);
int bnx2x_get_cur_phy_idx(struct bnx2x *bp)
{
u32 sel_phy_idx = 0; if (bp->link_params.num_phys <= 1) return INT_PHY;
if (bp->link_vars.link_up) {
sel_phy_idx = EXT_PHY1; /* In case link is SERDES, check if the EXT_PHY2 is the one */ if ((bp->link_vars.link_status & LINK_STATUS_SERDES_LINK) &&
(bp->link_params.phy[EXT_PHY2].supported & SUPPORTED_FIBRE))
sel_phy_idx = EXT_PHY2;
} else {
switch (bnx2x_phy_selection(&bp->link_params)) { case PORT_HW_CFG_PHY_SELECTION_HARDWARE_DEFAULT: case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY: case PORT_HW_CFG_PHY_SELECTION_FIRST_PHY_PRIORITY:
sel_phy_idx = EXT_PHY1; break; case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY: case PORT_HW_CFG_PHY_SELECTION_SECOND_PHY_PRIORITY:
sel_phy_idx = EXT_PHY2; break;
}
}
return sel_phy_idx;
} int bnx2x_get_link_cfg_idx(struct bnx2x *bp)
{
u32 sel_phy_idx = bnx2x_get_cur_phy_idx(bp); /* * The selected activated PHY is always after swapping (in case PHY * swapping is enabled). So when swapping is enabled, we need to reverse * the configuration
*/
if (pci_num_vf(bp->pdev)) {
netdev_features_t changed = dev->features ^ features;
/* Revert the requested changes in features if they * would require internal reload of PF in bnx2x_set_features().
*/ if (!(features & NETIF_F_RXCSUM) && !bp->disable_tpa) {
features &= ~NETIF_F_RXCSUM;
features |= dev->features & NETIF_F_RXCSUM;
}
if (changed & NETIF_F_LOOPBACK) {
features &= ~NETIF_F_LOOPBACK;
features |= dev->features & NETIF_F_LOOPBACK;
}
}
/* TPA requires Rx CSUM offloading */ if (!(features & NETIF_F_RXCSUM))
features &= ~NETIF_F_LRO;
if (!(features & NETIF_F_GRO) || !bnx2x_mtu_allows_gro(dev->mtu))
features &= ~NETIF_F_GRO_HW; if (features & NETIF_F_GRO_HW)
features &= ~NETIF_F_LRO;
return features;
}
int bnx2x_set_features(struct net_device *dev, netdev_features_t features)
{ struct bnx2x *bp = netdev_priv(dev);
netdev_features_t changes = features ^ dev->features; bool bnx2x_reload = false; int rc;
/* VFs or non SRIOV PFs should be able to change loopback feature */ if (!pci_num_vf(bp->pdev)) { if (features & NETIF_F_LOOPBACK) { if (bp->link_params.loopback_mode != LOOPBACK_BMAC) {
bp->link_params.loopback_mode = LOOPBACK_BMAC;
bnx2x_reload = true;
}
} else { if (bp->link_params.loopback_mode != LOOPBACK_NONE) {
bp->link_params.loopback_mode = LOOPBACK_NONE;
bnx2x_reload = true;
}
}
}
/* Don't care about GRO changes */
changes &= ~NETIF_F_GRO;
if (changes)
bnx2x_reload = true;
if (bnx2x_reload) { if (bp->recovery_state == BNX2X_RECOVERY_DONE) {
dev->features = features;
rc = bnx2x_reload_if_running(dev); return rc ? rc : 1;
} /* else: bnx2x_nic_load() will be called at end of recovery */
}
/* We want the information of the dump logged, * but calling bnx2x_panic() would kill all chances of recovery.
*/ if (!bp->panic) #ifndef BNX2X_STOP_ON_ERROR
bnx2x_panic_dump(bp, false); #else
bnx2x_panic(); #endif
/* This allows the netif to be shutdown gracefully before resetting */
bnx2x_schedule_sp_rtnl(bp, BNX2X_SP_RTNL_TX_TIMEOUT, 0);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.