staticconstchar ixgbe_overheat_msg[] = "Network adapter has been stopped because it has over heated. Restart the computer. If the problem persists, power off the system and replace the adapter";
#ifdef CONFIG_PCI_IOV staticunsignedint max_vfs;
module_param(max_vfs, uint, 0);
MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate per physical function - default is zero and maximum value is 63. (Deprecated)"); #endif/* CONFIG_PCI_IOV */
staticbool allow_unsupported_sfp;
module_param(allow_unsupported_sfp, bool, 0444);
MODULE_PARM_DESC(allow_unsupported_sfp, "Allow unsupported and untested SFP+ modules on 82599-based adapters");
/* Get the negotiated link width and speed from PCI config space of the * parent, as this device is behind a switch
*/
err = ixgbe_read_pci_cfg_word_parent(adapter, 18, &link_status);
/* assume caller will handle error case */ if (err) return err;
/** * ixgbe_pcie_from_parent - Determine whether PCIe info should come from parent * @hw: hw specific details * * This function is used by probe to determine whether a device's PCI-Express * bandwidth details should be gathered from the parent bus instead of from the * device. Used to ensure that various locations all have the correct device ID * checks. * * Return: true if information should be collected from the parent bus, false * otherwise
*/ staticbool ixgbe_pcie_from_parent(struct ixgbe_hw *hw)
{ switch (hw->device_id) { case IXGBE_DEV_ID_82599_SFP_SF_QP: case IXGBE_DEV_ID_82599_QSFP_SF_QP: returntrue; default: returnfalse;
}
}
/* Some devices are not connected over PCIe and thus do not negotiate * speed. These devices do not have valid bus info, and thus any report * we generate may not be correct.
*/ if (hw->bus.type == ixgbe_bus_type_internal) return;
/* determine whether to use the parent device */ if (ixgbe_pcie_from_parent(&adapter->hw))
pdev = adapter->pdev->bus->parent->self; else
pdev = adapter->pdev;
reg_addr = READ_ONCE(hw->hw_addr); if (ixgbe_removed(reg_addr)) return IXGBE_FAILED_READ_REG;
/* Register read of 0xFFFFFFF can indicate the adapter has been removed, * so perform several status register reads to determine if the adapter * has been removed.
*/ for (i = 0; i < IXGBE_FAILED_READ_RETRIES; i++) {
value = readl(reg_addr + IXGBE_STATUS); if (value != IXGBE_FAILED_READ_REG) break;
mdelay(3);
}
if (value == IXGBE_FAILED_READ_REG)
ixgbe_remove_adapter(hw); else
value = readl(reg_addr + reg); return value;
}
/** * ixgbe_read_reg - Read from device register * @hw: hw specific details * @reg: offset of register to read * * Returns : value read or IXGBE_FAILED_READ_REG if removed * * This function is used to read device registers. It checks for device * removal by confirming any read that returns all ones by checking the * status register value for all ones. This function avoids reading from * the hardware if a removal was previously detected in which case it * returns IXGBE_FAILED_READ_REG (all ones).
*/
u32 ixgbe_read_reg(struct ixgbe_hw *hw, u32 reg)
{
u8 __iomem *reg_addr = READ_ONCE(hw->hw_addr);
u32 value;
if (ixgbe_removed(reg_addr)) return IXGBE_FAILED_READ_REG; if (unlikely(hw->phy.nw_mng_if_sel &
IXGBE_NW_MNG_IF_SEL_SGMII_ENABLE)) { struct ixgbe_adapter *adapter; int i;
for (i = 0; i < 200; ++i) {
value = readl(reg_addr + IXGBE_MAC_SGMII_BUSY); if (likely(!value)) goto writes_completed; if (value == IXGBE_FAILED_READ_REG) {
ixgbe_remove_adapter(hw); return IXGBE_FAILED_READ_REG;
}
udelay(5);
}
switch (reginfo->ofs) { case IXGBE_SRRCTL(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); break; case IXGBE_DCA_RXCTRL(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); break; case IXGBE_RDLEN(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_RDLEN(i)); break; case IXGBE_RDH(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_RDH(i)); break; case IXGBE_RDT(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_RDT(i)); break; case IXGBE_RXDCTL(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); break; case IXGBE_RDBAL(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_RDBAL(i)); break; case IXGBE_RDBAH(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_RDBAH(i)); break; case IXGBE_TDBAL(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_TDBAL(i)); break; case IXGBE_TDBAH(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_TDBAH(i)); break; case IXGBE_TDLEN(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_TDLEN(i)); break; case IXGBE_TDH(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_TDH(i)); break; case IXGBE_TDT(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_TDT(i)); break; case IXGBE_TXDCTL(0): for (i = 0; i < 64; i++)
regs[i] = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i)); break; default:
pr_info("%-15s %08x\n",
reginfo->name, IXGBE_READ_REG(hw, reginfo->ofs)); return;
}
i = 0; while (i < 64) { int j; char buf[9 * 8 + 1]; char *p = buf;
snprintf(rname, 16, "%s[%d-%d]", reginfo->name, i, i + 7); for (j = 0; j < 8; j++)
p += sprintf(p, " %08x", regs[i++]);
pr_err("%-15s%s\n", rname, buf);
}
}
staticvoid ixgbe_print_buffer(struct ixgbe_ring *ring, int n)
{ struct ixgbe_tx_buffer *tx_buffer;
/* Let firmware take over control of h/w */
ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
ctrl_ext & ~IXGBE_CTRL_EXT_DRV_LOAD);
}
/* Let firmware know the driver has taken over */
ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT,
ctrl_ext | IXGBE_CTRL_EXT_DRV_LOAD);
}
/** * ixgbe_set_ivar - set the IVAR registers, mapping interrupt causes to vectors * @adapter: pointer to adapter struct * @direction: 0 for Rx, 1 for Tx, -1 for other causes * @queue: queue to map the corresponding interrupt to * @msix_vector: the vector to map to the corresponding queue *
*/ staticvoid ixgbe_set_ivar(struct ixgbe_adapter *adapter, s8 direction,
u8 queue, u8 msix_vector)
{
u32 ivar, index; struct ixgbe_hw *hw = &adapter->hw; switch (hw->mac.type) { case ixgbe_mac_82598EB:
msix_vector |= IXGBE_IVAR_ALLOC_VAL; if (direction == -1)
direction = 0;
index = (((direction * 64) + queue) >> 2) & 0x1F;
ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(index));
ivar &= ~(0xFF << (8 * (queue & 0x3)));
ivar |= (msix_vector << (8 * (queue & 0x3)));
IXGBE_WRITE_REG(hw, IXGBE_IVAR(index), ivar); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610: if (direction == -1) { /* other causes */
msix_vector |= IXGBE_IVAR_ALLOC_VAL;
index = ((queue & 1) * 8);
ivar = IXGBE_READ_REG(&adapter->hw, IXGBE_IVAR_MISC);
ivar &= ~(0xFF << index);
ivar |= (msix_vector << index);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_IVAR_MISC, ivar); break;
} else { /* tx or rx causes */
msix_vector |= IXGBE_IVAR_ALLOC_VAL;
index = ((16 * (queue & 1)) + (8 * direction));
ivar = IXGBE_READ_REG(hw, IXGBE_IVAR(queue >> 1));
ivar &= ~(0xFF << index);
ivar |= (msix_vector << index);
IXGBE_WRITE_REG(hw, IXGBE_IVAR(queue >> 1), ivar); break;
} default: break;
}
}
/** * ixgbe_get_vf_idx - provide VF index number based on queue index * @adapter: pointer to the adapter struct * @queue: Tx queue identifier * @vf: output VF index * * Provide VF index number associated to the input queue. * * Returns: 0 if VF provided or error number.
*/ staticint ixgbe_get_vf_idx(struct ixgbe_adapter *adapter, u16 queue, u16 *vf)
{ struct ixgbe_hw *hw = &adapter->hw;
u8 queue_count;
u32 reg;
if (queue >= adapter->num_tx_queues) return -EINVAL;
/* Determine number of queues by checking * number of virtual functions
*/
reg = IXGBE_READ_REG(hw, IXGBE_GCR_EXT); switch (reg & IXGBE_GCR_EXT_VT_MODE_MASK) { case IXGBE_GCR_EXT_VT_MODE_64:
queue_count = IXGBE_64VFS_QUEUES; break; case IXGBE_GCR_EXT_VT_MODE_32:
queue_count = IXGBE_32VFS_QUEUES; break; case IXGBE_GCR_EXT_VT_MODE_16:
queue_count = IXGBE_16VFS_QUEUES; break; default: return -EINVAL;
}
/* * Check for a hung queue, but be thorough. This verifies * that a transmit has been completed since the previous * check AND there is at least one packet pending. The * ARMED bit is set to indicate a potential hang. The * bit is cleared if a pause frame is received to remove * false hang detection due to PFC or 802.3x frames. By * requiring this to fail twice we avoid races with * pfc clearing the ARMED bit and conditions where we * run the check_tx_hang logic with a transmit completion * pending but without time to complete it yet.
*/ if (tx_done_old == tx_done && tx_pending) /* make sure it is true for two checks in a row */ return test_and_set_bit(__IXGBE_HANG_CHECK_ARMED,
&tx_ring->state); /* update completed stats and continue */
tx_ring->tx_stats.tx_done_old = tx_done; /* reset the countdown */
clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
/* Do the reset outside of interrupt context */ if (!test_bit(__IXGBE_DOWN, &adapter->state)) {
set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
e_warn(drv, "initiating reset due to tx timeout\n");
ixgbe_service_event_schedule(adapter);
}
}
/** * ixgbe_tx_maxrate - callback to set the maximum per-queue bitrate * @netdev: network interface device structure * @queue_index: Tx queue to set * @maxrate: desired maximum transmit bitrate
**/ staticint ixgbe_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
{ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw;
u32 bcnrc_val = ixgbe_link_mbps(adapter);
if (!maxrate) return 0;
/* Calculate the rate factor values to set */
bcnrc_val <<= IXGBE_RTTBCNRC_RF_INT_SHIFT;
bcnrc_val /= maxrate;
/* clear everything but the rate factor */
bcnrc_val &= IXGBE_RTTBCNRC_RF_INT_MASK |
IXGBE_RTTBCNRC_RF_DEC_MASK;
/* enable the rate scheduler */
bcnrc_val |= IXGBE_RTTBCNRC_RS_ENA;
/** * ixgbe_update_tx_ring_stats - Update Tx ring specific counters * @tx_ring: ring to update * @q_vector: queue vector ring belongs to * @pkts: number of processed packets * @bytes: number of processed bytes
*/ void ixgbe_update_tx_ring_stats(struct ixgbe_ring *tx_ring, struct ixgbe_q_vector *q_vector, u64 pkts,
u64 bytes)
{
u64_stats_update_begin(&tx_ring->syncp);
tx_ring->stats.bytes += bytes;
tx_ring->stats.packets += pkts;
u64_stats_update_end(&tx_ring->syncp);
q_vector->tx.total_bytes += bytes;
q_vector->tx.total_packets += pkts;
}
/** * ixgbe_update_rx_ring_stats - Update Rx ring specific counters * @rx_ring: ring to update * @q_vector: queue vector ring belongs to * @pkts: number of processed packets * @bytes: number of processed bytes
*/ void ixgbe_update_rx_ring_stats(struct ixgbe_ring *rx_ring, struct ixgbe_q_vector *q_vector, u64 pkts,
u64 bytes)
{
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->stats.bytes += bytes;
rx_ring->stats.packets += pkts;
u64_stats_update_end(&rx_ring->syncp);
q_vector->rx.total_bytes += bytes;
q_vector->rx.total_packets += pkts;
}
/** * ixgbe_pf_handle_tx_hang - handle Tx hang on PF * @tx_ring: tx ring number * @next: next ring * * Prints a message containing details about the tx hang.
*/ staticvoid ixgbe_pf_handle_tx_hang(struct ixgbe_ring *tx_ring, unsignedint next)
{ struct ixgbe_adapter *adapter = netdev_priv(tx_ring->netdev); struct ixgbe_hw *hw = &adapter->hw;
/** * ixgbe_vf_handle_tx_hang - handle Tx hang on VF * @adapter: structure containing ring specific data * @vf: VF index * * Print a message containing details about malicious driver detection. * Set malicious VF link down if the detection happened several times.
*/ staticvoid ixgbe_vf_handle_tx_hang(struct ixgbe_adapter *adapter, u16 vf)
{ struct ixgbe_hw *hw = &adapter->hw;
if (adapter->hw.mac.type != ixgbe_mac_e610) return;
e_warn(drv, "Malicious Driver Detection tx hang detected on PF %d VF %d MAC: %pM",
hw->bus.func, vf, adapter->vfinfo[vf].vf_mac_addresses);
/** * ixgbe_check_illegal_queue - search for queue with illegal packet * @adapter: structure containing ring specific data * @queue: queue index * * Check if tx descriptor connected with input queue * contains illegal packet. * * Returns: true if queue contain illegal packet.
*/ staticbool ixgbe_check_illegal_queue(struct ixgbe_adapter *adapter,
u16 queue)
{
u32 hdr_len_reg, mss_len_reg, type_reg; struct ixgbe_hw *hw = &adapter->hw;
u32 mss_len, header_len, reg;
for (u16 i = 0; i < IXGBE_MAX_TX_DESCRIPTORS; i++) { /* HW will clear bit IXGBE_TXDESCIC_READY when address * is written to address field. HW will set this bit * when iCache read is done, and data is ready at TIC_DWx. * Set descriptor address.
*/
read_poll_timeout(ixgbe_poll_tx_icache, reg,
!(reg & IXGBE_TXDESCIC_READY), 0, 0, false,
hw, queue, i);
/* update the statistics for this packet */
total_bytes += tx_buffer->bytecount;
total_packets += tx_buffer->gso_segs; if (tx_buffer->tx_flags & IXGBE_TX_FLAGS_IPSEC)
total_ipsec++;
/* free the skb */ if (ring_is_xdp(tx_ring))
xdp_return_frame(tx_buffer->xdpf); else
napi_consume_skb(tx_buffer->skb, napi_budget);
/* clear tx_buffer data */
dma_unmap_len_set(tx_buffer, len, 0);
/* unmap remaining buffers */ while (tx_desc != eop_desc) {
tx_buffer++;
tx_desc++;
i++; if (unlikely(!i)) {
i -= tx_ring->count;
tx_buffer = tx_ring->tx_buffer_info;
tx_desc = IXGBE_TX_DESC(tx_ring, 0);
}
/* unmap any remaining paged data */ if (dma_unmap_len(tx_buffer, len)) {
dma_unmap_page(tx_ring->dev,
dma_unmap_addr(tx_buffer, dma),
dma_unmap_len(tx_buffer, len),
DMA_TO_DEVICE);
dma_unmap_len_set(tx_buffer, len, 0);
}
}
/* move us one more past the eop_desc for start of next pkt */
tx_buffer++;
tx_desc++;
i++; if (unlikely(!i)) {
i -= tx_ring->count;
tx_buffer = tx_ring->tx_buffer_info;
tx_desc = IXGBE_TX_DESC(tx_ring, 0);
}
/* issue prefetch for next Tx descriptor */
prefetch(tx_desc);
/* update budget accounting */
budget--;
} while (likely(budget));
if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
txctrl = dca3_get_tag(tx_ring->dev, cpu);
switch (hw->mac.type) { case ixgbe_mac_82598EB:
reg_offset = IXGBE_DCA_TXCTRL(tx_ring->reg_idx); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540:
reg_offset = IXGBE_DCA_TXCTRL_82599(tx_ring->reg_idx);
txctrl <<= IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599; break; default: /* for unknown hardware do not write register */ return;
}
/* * We can enable relaxed ordering for reads, but not writes when * DCA is enabled. This is due to a known issue in some chipsets * which will cause the DCA tag to be cleared.
*/
txctrl |= IXGBE_DCA_TXCTRL_DESC_RRO_EN |
IXGBE_DCA_TXCTRL_DATA_RRO_EN |
IXGBE_DCA_TXCTRL_DESC_DCA_EN;
if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
rxctrl = dca3_get_tag(rx_ring->dev, cpu);
switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540:
rxctrl <<= IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599; break; default: break;
}
/* * We can enable relaxed ordering for reads, but not writes when * DCA is enabled. This is due to a known issue in some chipsets * which will cause the DCA tag to be cleared.
*/
rxctrl |= IXGBE_DCA_RXCTRL_DESC_RRO_EN |
IXGBE_DCA_RXCTRL_DATA_DCA_EN |
IXGBE_DCA_RXCTRL_DESC_DCA_EN;
staticvoid ixgbe_setup_dca(struct ixgbe_adapter *adapter)
{ int i;
/* always use CB2 mode, difference is masked in the CB driver */ if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
IXGBE_DCA_CTRL_DCA_MODE_CB2); else
IXGBE_WRITE_REG(&adapter->hw, IXGBE_DCA_CTRL,
IXGBE_DCA_CTRL_DCA_DISABLE);
for (i = 0; i < adapter->num_q_vectors; i++) {
adapter->q_vector[i]->cpu = -1;
ixgbe_update_dca(adapter->q_vector[i]);
}
}
#ifdef IXGBE_FCOE /** * ixgbe_rx_is_fcoe - check the rx desc for incoming pkt type * @ring: structure containing ring specific data * @rx_desc: advanced rx descriptor * * Returns : true if it is FCoE pkt
*/ staticinlinebool ixgbe_rx_is_fcoe(struct ixgbe_ring *ring, union ixgbe_adv_rx_desc *rx_desc)
{
__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info;
#endif/* IXGBE_FCOE */ /** * ixgbe_rx_checksum - indicate in skb if hw indicated a good cksum * @ring: structure containing ring specific data * @rx_desc: current Rx descriptor being processed * @skb: skb currently being received and modified
**/ staticinlinevoid ixgbe_rx_checksum(struct ixgbe_ring *ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb)
{
__le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; bool encap_pkt = false;
skb_checksum_none_assert(skb);
/* Rx csum disabled */ if (!(ring->netdev->features & NETIF_F_RXCSUM)) return;
/* check for VXLAN and Geneve packets */ if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_VXLAN)) {
encap_pkt = true;
skb->encapsulation = 1;
}
/* if IP and error */ if (ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) &&
ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) {
ring->rx_stats.csum_err++; return;
}
if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS)) return;
if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { /* * 82599 errata, UDP frames with a 0 checksum can be marked as * checksum errors.
*/ if ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_UDP)) &&
test_bit(__IXGBE_RX_CSUM_UDP_ZERO_ERR, &ring->state)) return;
ring->rx_stats.csum_err++; return;
}
/* It must be a TCP or UDP packet with a valid checksum */
skb->ip_summed = CHECKSUM_UNNECESSARY; if (encap_pkt) { if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_OUTERIPCS)) return;
if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_ERR_OUTERIPER)) {
skb->ip_summed = CHECKSUM_NONE; return;
} /* If we checked the outer header let the stack know */
skb->csum_level = 1;
}
}
/* since we are recycling buffers we should seldom need to alloc */ if (likely(page)) returntrue;
/* alloc new page for storage */
page = dev_alloc_pages(ixgbe_rx_pg_order(rx_ring)); if (unlikely(!page)) {
rx_ring->rx_stats.alloc_rx_page_failed++; returnfalse;
}
/* map page for use */
dma = dma_map_page_attrs(rx_ring->dev, page, 0,
ixgbe_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
IXGBE_RX_DMA_ATTR);
/* * if mapping failed free memory back to system since * there isn't much point in holding memory we can't use
*/ if (dma_mapping_error(rx_ring->dev, dma)) {
__free_pages(page, ixgbe_rx_pg_order(rx_ring));
/** * ixgbe_alloc_rx_buffers - Replace used receive buffers * @rx_ring: ring to place buffers on * @cleaned_count: number of buffers to replace
**/ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count)
{ union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *bi;
u16 i = rx_ring->next_to_use;
u16 bufsz;
/* nothing to do */ if (!cleaned_count) return;
rx_desc = IXGBE_RX_DESC(rx_ring, i);
bi = &rx_ring->rx_buffer_info[i];
i -= rx_ring->count;
bufsz = ixgbe_rx_bufsz(rx_ring);
do { if (!ixgbe_alloc_mapped_page(rx_ring, bi)) break;
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
bi->page_offset, bufsz,
DMA_FROM_DEVICE);
/* * Refresh the desc even if buffer_addrs didn't change * because each write-back erases this info.
*/
rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
rx_desc++;
bi++;
i++; if (unlikely(!i)) {
rx_desc = IXGBE_RX_DESC(rx_ring, 0);
bi = rx_ring->rx_buffer_info;
i -= rx_ring->count;
}
/* clear the length for the next_to_use descriptor */
rx_desc->wb.upper.length = 0;
cleaned_count--;
} while (cleaned_count);
i += rx_ring->count;
if (rx_ring->next_to_use != i) {
rx_ring->next_to_use = i;
/* update next to alloc since we have filled the ring */
rx_ring->next_to_alloc = i;
/* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64).
*/
wmb();
writel(i, rx_ring->tail);
}
}
/* set gso_size to avoid messing up TCP MSS */
skb_shinfo(skb)->gso_size = DIV_ROUND_UP((skb->len - hdr_len),
IXGBE_CB(skb)->append_cnt);
skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
}
staticvoid ixgbe_update_rsc_stats(struct ixgbe_ring *rx_ring, struct sk_buff *skb)
{ /* if append_cnt is 0 then frame is not RSC */ if (!IXGBE_CB(skb)->append_cnt) return;
/* gso_size is computed using append_cnt so always clear it last */
IXGBE_CB(skb)->append_cnt = 0;
}
/** * ixgbe_process_skb_fields - Populate skb header fields from Rx descriptor * @rx_ring: rx descriptor ring packet is being transacted on * @rx_desc: pointer to the EOP Rx descriptor * @skb: pointer to current skb being populated * * This function checks the ring, descriptor, and packet information in * order to populate the hash, checksum, VLAN, timestamp, protocol, and * other fields within the skb.
**/ void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb)
{ struct net_device *dev = rx_ring->netdev;
u32 flags = rx_ring->q_vector->adapter->flags;
ixgbe_update_rsc_stats(rx_ring, skb);
ixgbe_rx_hash(rx_ring, rx_desc, skb);
ixgbe_rx_checksum(rx_ring, rx_desc, skb);
if (unlikely(flags & IXGBE_FLAG_RX_HWTSTAMP_ENABLED))
ixgbe_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) {
u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan);
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
}
if (ixgbe_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP))
ixgbe_ipsec_rx(rx_ring, rx_desc, skb);
/* record Rx queue, or update MACVLAN statistics */ if (netif_is_ixgbe(dev))
skb_record_rx_queue(skb, rx_ring->queue_index); else
macvlan_count_rx(netdev_priv(dev), skb->len + ETH_HLEN, true, false);
/** * ixgbe_is_non_eop - process handling of non-EOP buffers * @rx_ring: Rx ring being processed * @rx_desc: Rx descriptor for current buffer * @skb: Current socket buffer containing buffer in progress * * This function updates next to clean. If the buffer is an EOP buffer * this function exits returning false, otherwise it will place the * sk_buff in the next buffer to be chained and return true indicating * that this is in fact a non-EOP buffer.
**/ staticbool ixgbe_is_non_eop(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb)
{
u32 ntc = rx_ring->next_to_clean + 1;
/* fetch, update, and store next to clean */
ntc = (ntc < rx_ring->count) ? ntc : 0;
rx_ring->next_to_clean = ntc;
prefetch(IXGBE_RX_DESC(rx_ring, ntc));
/* update RSC append count if present */ if (ring_is_rsc_enabled(rx_ring)) {
__le32 rsc_enabled = rx_desc->wb.lower.lo_dword.data &
cpu_to_le32(IXGBE_RXDADV_RSCCNT_MASK);
if (unlikely(rsc_enabled)) {
u32 rsc_cnt = le32_to_cpu(rsc_enabled);
/* update ntc based on RSC value */
ntc = le32_to_cpu(rx_desc->wb.upper.status_error);
ntc &= IXGBE_RXDADV_NEXTP_MASK;
ntc >>= IXGBE_RXDADV_NEXTP_SHIFT;
}
}
/* if we are the last buffer then there is nothing else to do */ if (likely(ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) returnfalse;
/* place skb in next buffer to be received */
rx_ring->rx_buffer_info[ntc].skb = skb;
rx_ring->rx_stats.non_eop_descs++;
returntrue;
}
/** * ixgbe_pull_tail - ixgbe specific version of skb_pull_tail * @rx_ring: rx descriptor ring packet is being transacted on * @skb: pointer to current skb being adjusted * * This function is an ixgbe specific version of __pskb_pull_tail. The * main difference between this version and the original function is that * this function can make several assumptions about the state of things * that allow for significant optimizations versus the standard function. * As a result we can do things like drop a frag and maintain an accurate * truesize for the skb.
*/ staticvoid ixgbe_pull_tail(struct ixgbe_ring *rx_ring, struct sk_buff *skb)
{
skb_frag_t *frag = &skb_shinfo(skb)->frags[0]; unsignedchar *va; unsignedint pull_len;
/* * it is valid to use page_address instead of kmap since we are * working with pages allocated out of the lomem pool per * alloc_page(GFP_ATOMIC)
*/
va = skb_frag_address(frag);
/* * we need the header to contain the greater of either ETH_HLEN or * 60 bytes if the skb->len is less than 60 for skb_pad.
*/
pull_len = eth_get_headlen(skb->dev, va, IXGBE_RX_HDR_SIZE);
/* align pull length to size of long to optimize memcpy performance */
skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
/* update all of the pointers */
skb_frag_size_sub(frag, pull_len);
skb_frag_off_add(frag, pull_len);
skb->data_len -= pull_len;
skb->tail += pull_len;
}
/** * ixgbe_dma_sync_frag - perform DMA sync for first frag of SKB * @rx_ring: rx descriptor ring packet is being transacted on * @skb: pointer to current skb being updated * * This function provides a basic DMA sync up for the first fragment of an * skb. The reason for doing this is that the first fragment cannot be * unmapped until we have reached the end of packet descriptor for a buffer * chain.
*/ staticvoid ixgbe_dma_sync_frag(struct ixgbe_ring *rx_ring, struct sk_buff *skb)
{ if (ring_uses_build_skb(rx_ring)) { unsignedlong mask = (unsignedlong)ixgbe_rx_pg_size(rx_ring) - 1; unsignedlong offset = (unsignedlong)(skb->data) & mask;
/* If the page was released, just unmap it. */ if (unlikely(IXGBE_CB(skb)->page_released)) {
dma_unmap_page_attrs(rx_ring->dev, IXGBE_CB(skb)->dma,
ixgbe_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
IXGBE_RX_DMA_ATTR);
}
}
/** * ixgbe_cleanup_headers - Correct corrupted or empty headers * @rx_ring: rx descriptor ring packet is being transacted on * @rx_desc: pointer to the EOP Rx descriptor * @skb: pointer to current skb being fixed * * Check if the skb is valid in the XDP case it will be an error pointer. * Return true in this case to abort processing and advance to next * descriptor. * * Check for corrupted packet headers caused by senders on the local L2 * embedded NIC switch not setting up their Tx Descriptors right. These * should be very rare. * * Also address the case where we are pulling data in on pages only * and as such no data is present in the skb header. * * In addition if skb is not at least 60 bytes we need to pad it so that * it is large enough to qualify as a valid Ethernet frame. * * Returns true if an error was encountered and skb was freed.
**/ bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff *skb)
{ struct net_device *netdev = rx_ring->netdev;
/* Verify netdev is present, and that packet does not have any * errors that would be unacceptable to the netdev.
*/ if (!netdev ||
(unlikely(ixgbe_test_staterr(rx_desc,
IXGBE_RXDADV_ERR_FRAME_ERR_MASK) &&
!(netdev->features & NETIF_F_RXALL)))) {
dev_kfree_skb_any(skb); returntrue;
}
/* place header in linear portion of buffer */ if (!skb_headlen(skb))
ixgbe_pull_tail(rx_ring, skb);
#ifdef IXGBE_FCOE /* do not attempt to pad FCoE Frames as this will disrupt DDP */ if (ixgbe_rx_is_fcoe(rx_ring, rx_desc)) returnfalse;
#endif /* if eth_skb_pad returns an error the skb was freed */ if (eth_skb_pad(skb)) returntrue;
returnfalse;
}
/** * ixgbe_reuse_rx_page - page flip buffer and store it back on the ring * @rx_ring: rx descriptor ring to store buffers on * @old_buff: donor buffer to have page reused * * Synchronizes page for reuse by the adapter
**/ staticvoid ixgbe_reuse_rx_page(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *old_buff)
{ struct ixgbe_rx_buffer *new_buff;
u16 nta = rx_ring->next_to_alloc;
new_buff = &rx_ring->rx_buffer_info[nta];
/* update, and store next to alloc */
nta++;
rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
/* Transfer page from old buffer to new buffer. * Move each member individually to avoid possible store * forwarding stalls and unnecessary copy of skb.
*/
new_buff->dma = old_buff->dma;
new_buff->page = old_buff->page;
new_buff->page_offset = old_buff->page_offset;
new_buff->pagecnt_bias = old_buff->pagecnt_bias;
}
/* avoid re-using remote and pfmemalloc pages */ if (!dev_page_is_reusable(page)) returnfalse;
#if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) returnfalse; #else /* The last offset is a bit aggressive in that we assume the * worst case of FCoE being enabled and using a 3K buffer. * However this should have minimal impact as the 1K extra is * still less than one buffer in size.
*/ #define IXGBE_LAST_OFFSET \
(SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBE_RXBUFFER_3K) if (rx_buffer->page_offset > IXGBE_LAST_OFFSET) returnfalse; #endif
/* If we have drained the page fragment pool we need to update * the pagecnt_bias and page count so that we fully restock the * number of references the driver holds.
*/ if (unlikely(pagecnt_bias == 1)) {
page_ref_add(page, USHRT_MAX - 1);
rx_buffer->pagecnt_bias = USHRT_MAX;
}
returntrue;
}
/** * ixgbe_add_rx_frag - Add contents of Rx buffer to sk_buff * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: buffer containing page to add * @skb: sk_buff to place the data into * @size: size of data in rx_buffer * * This function will add the data contained in rx_buffer->page to the skb. * This is done either through a direct copy if the data in the buffer is * less than the skb header size, otherwise it will just attach the page as * a frag to the skb. * * The function will then update the page offset if necessary and return * true if the buffer can be reused by the adapter.
**/ staticvoid ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, struct sk_buff *skb, unsignedint size)
{ #if (PAGE_SIZE < 8192) unsignedint truesize = ixgbe_rx_pg_size(rx_ring) / 2; #else unsignedint truesize = rx_ring->rx_offset ?
SKB_DATA_ALIGN(rx_ring->rx_offset + size) :
SKB_DATA_ALIGN(size); #endif
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
rx_buffer->page_offset, size, truesize); #if (PAGE_SIZE < 8192)
rx_buffer->page_offset ^= truesize; #else
rx_buffer->page_offset += truesize; #endif
}
staticstruct ixgbe_rx_buffer *ixgbe_get_rx_buffer(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff **skb, constunsignedint size, int *rx_buffer_pgcnt)
{ struct ixgbe_rx_buffer *rx_buffer;
/* Delay unmapping of the first packet. It carries the header * information, HW may still access the header after the writeback. * Only unmap it when EOP is reached
*/ if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP)) { if (!*skb) goto skip_sync;
} else { if (*skb)
ixgbe_dma_sync_frag(rx_ring, *skb);
}
/* we are reusing so sync this buffer for CPU use */
dma_sync_single_range_for_cpu(rx_ring->dev,
rx_buffer->dma,
rx_buffer->page_offset,
size,
DMA_FROM_DEVICE);
skip_sync:
rx_buffer->pagecnt_bias--;
return rx_buffer;
}
staticvoid ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, struct sk_buff *skb, int rx_buffer_pgcnt)
{ if (ixgbe_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { /* hand second half of page back to the ring */
ixgbe_reuse_rx_page(rx_ring, rx_buffer);
} else { if (skb && IXGBE_CB(skb)->dma == rx_buffer->dma) { /* the page has been released from the ring */
IXGBE_CB(skb)->page_released = true;
} else { /* we are not reusing the buffer so unmap it */
dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
ixgbe_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
IXGBE_RX_DMA_ATTR);
}
__page_frag_cache_drain(rx_buffer->page,
rx_buffer->pagecnt_bias);
}
/* prefetch first cache line of first page */
net_prefetch(xdp->data);
/* Note, we get here by enabling legacy-rx via: * * ethtool --set-priv-flags <dev> legacy-rx on * * In this mode, we currently get 0 extra XDP headroom as * opposed to having legacy-rx off, where we process XDP * packets going to stack via ixgbe_build_skb(). The latter * provides us currently with 192 bytes of headroom. * * For ixgbe_construct_skb() mode it means that the * xdp->data_meta will always point to xdp->data, since * the helper cannot expand the head. Should this ever * change in future for legacy-rx mode on, then lets also * add xdp->data_meta handling here.
*/
/* allocate a skb to store the frags */
skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBE_RX_HDR_SIZE); if (unlikely(!skb)) return NULL;
if (size > IXGBE_RX_HDR_SIZE) { if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
IXGBE_CB(skb)->dma = rx_buffer->dma;
/* Prefetch first cache line of first page. If xdp->data_meta * is unused, this points exactly as xdp->data, otherwise we * likely have a consumer accessing first few bytes of meta * data, and then actual data.
*/
net_prefetch(xdp->data_meta);
/* build an skb to around the page buffer */
skb = napi_build_skb(xdp->data_hard_start, truesize); if (unlikely(!skb)) return NULL;
/* update pointers within the skb to store the data */
skb_reserve(skb, xdp->data - xdp->data_hard_start);
__skb_put(skb, xdp->data_end - xdp->data); if (metasize)
skb_metadata_set(skb, metasize);
/* record DMA address if this is the start of a chain of buffers */ if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
IXGBE_CB(skb)->dma = rx_buffer->dma;
/** * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @q_vector: structure containing interrupt and ring information * @rx_ring: rx descriptor ring to transact packets on * @budget: Total limit on number of packets to process * * This function provides a "bounce buffer" approach to Rx interrupt * processing. The advantage to this is that on systems that have * expensive overhead for IOMMU access this provides a means of avoiding * it by maintaining the mapping of the page to the system. * * Returns amount of work completed
**/ staticint ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, struct ixgbe_ring *rx_ring, constint budget)
{ unsignedint total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0; struct ixgbe_adapter *adapter = q_vector->adapter; #ifdef IXGBE_FCOE int ddp_bytes; unsignedint mss = 0; #endif/* IXGBE_FCOE */
u16 cleaned_count = ixgbe_desc_unused(rx_ring); unsignedint offset = rx_ring->rx_offset; unsignedint xdp_xmit = 0; struct xdp_buff xdp; int xdp_res = 0;
while (likely(total_rx_packets < budget)) { union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *rx_buffer; struct sk_buff *skb; int rx_buffer_pgcnt; unsignedint size;
/* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {
ixgbe_alloc_rx_buffers(rx_ring, cleaned_count);
cleaned_count = 0;
}
rx_desc = IXGBE_RX_DESC(rx_ring, rx_ring->next_to_clean);
size = le16_to_cpu(rx_desc->wb.upper.length); if (!size) break;
/* This memory barrier is needed to keep us from reading * any other fields out of the rx_desc until we know the * descriptor has been written back
*/
dma_rmb();
/* * Populate the IVAR table and set the ITR values to the * corresponding register.
*/ for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) { struct ixgbe_ring *ring;
q_vector = adapter->q_vector[v_idx];
switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB:
ixgbe_set_ivar(adapter, -1, IXGBE_IVAR_OTHER_CAUSES_INDEX,
v_idx); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610:
ixgbe_set_ivar(adapter, -1, 1, v_idx); break; default: break;
}
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITR(v_idx), 1950);
/* set up to autoclear timer, and the vectors */
mask = IXGBE_EIMS_ENABLE_MASK;
mask &= ~(IXGBE_EIMS_OTHER |
IXGBE_EIMS_MAILBOX |
IXGBE_EIMS_LSC);
if (adapter->hw.mac.type == ixgbe_mac_e610)
mask &= ~IXGBE_EIMS_FW_EVENT;
/** * ixgbe_update_itr - update the dynamic ITR value based on statistics * @q_vector: structure containing interrupt and ring information * @ring_container: structure containing ring performance data * * Stores a new ITR value based on packets and byte * counts during the last interrupt. The advantage of per interrupt * computation is faster updates and more accurate ITR for the current * traffic pattern. Constants in this function were computed * based on theoretical maximum wire speed and thresholds were set based * on testing data as well as attempting to minimize response time * while increasing bulk throughput.
**/ staticvoid ixgbe_update_itr(struct ixgbe_q_vector *q_vector, struct ixgbe_ring_container *ring_container)
{ unsignedint itr = IXGBE_ITR_ADAPTIVE_MIN_USECS |
IXGBE_ITR_ADAPTIVE_LATENCY; unsignedint avg_wire_size, packets, bytes; unsignedlong next_update = jiffies;
/* If we don't have any rings just leave ourselves set for maximum * possible latency so we take ourselves out of the equation.
*/ if (!ring_container->ring) return;
/* If we didn't update within up to 1 - 2 jiffies we can assume * that either packets are coming in so slow there hasn't been * any work, or that there is so much work that NAPI is dealing * with interrupt moderation and we don't need to do anything.
*/ if (time_after(next_update, ring_container->next_update)) goto clear_counts;
packets = ring_container->total_packets;
/* We have no packets to actually measure against. This means * either one of the other queues on this vector is active or * we are a Tx queue doing TSO with too high of an interrupt rate. * * When this occurs just tick up our delay by the minimum value * and hope that this extra delay will prevent us from being called * without any work on our queue.
*/ if (!packets) {
itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC; if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
itr = IXGBE_ITR_ADAPTIVE_MAX_USECS;
itr += ring_container->itr & IXGBE_ITR_ADAPTIVE_LATENCY; goto clear_counts;
}
bytes = ring_container->total_bytes;
/* If packets are less than 4 or bytes are less than 9000 assume * insufficient data to use bulk rate limiting approach. We are * likely latency driven.
*/ if (packets < 4 && bytes < 9000) {
itr = IXGBE_ITR_ADAPTIVE_LATENCY; goto adjust_by_size;
}
/* Between 4 and 48 we can assume that our current interrupt delay * is only slightly too low. As such we should increase it by a small * fixed amount.
*/ if (packets < 48) {
itr = (q_vector->itr >> 2) + IXGBE_ITR_ADAPTIVE_MIN_INC; if (itr > IXGBE_ITR_ADAPTIVE_MAX_USECS)
itr = IXGBE_ITR_ADAPTIVE_MAX_USECS; goto clear_counts;
}
/* Between 48 and 96 is our "goldilocks" zone where we are working * out "just right". Just report that our current ITR is good for us.
*/ if (packets < 96) {
itr = q_vector->itr >> 2; goto clear_counts;
}
/* If packet count is 96 or greater we are likely looking at a slight * overrun of the delay we want. Try halving our delay to see if that * will cut the number of packets in half per interrupt.
*/ if (packets < 256) {
itr = q_vector->itr >> 3; if (itr < IXGBE_ITR_ADAPTIVE_MIN_USECS)
itr = IXGBE_ITR_ADAPTIVE_MIN_USECS; goto clear_counts;
}
/* The paths below assume we are dealing with a bulk ITR since number * of packets is 256 or greater. We are just going to have to compute * a value and try to bring the count under control, though for smaller * packet sizes there isn't much we can do as NAPI polling will likely * be kicking in sooner rather than later.
*/
itr = IXGBE_ITR_ADAPTIVE_BULK;
adjust_by_size: /* If packet counts are 256 or greater we can assume we have a gross * overestimation of what the rate should be. Instead of trying to fine * tune it just use the formula below to try and dial in an exact value * give the current packet size of the frame.
*/
avg_wire_size = bytes / packets;
/* The following is a crude approximation of: * wmem_default / (size + overhead) = desired_pkts_per_int * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value * * Assuming wmem_default is 212992 and overhead is 640 bytes per * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the * formula down to * * (170 * (size + 24)) / (size + 640) = ITR * * We first do some math on the packet size and then finally bitshift * by 8 after rounding up. We also have to account for PCIe link speed * difference as ITR scales based on this.
*/ if (avg_wire_size <= 60) { /* Start at 50k ints/sec */
avg_wire_size = 5120;
} elseif (avg_wire_size <= 316) { /* 50K ints/sec to 16K ints/sec */
avg_wire_size *= 40;
avg_wire_size += 2720;
} elseif (avg_wire_size <= 1084) { /* 16K ints/sec to 9.2K ints/sec */
avg_wire_size *= 15;
avg_wire_size += 11452;
} elseif (avg_wire_size < 1968) { /* 9.2K ints/sec to 8K ints/sec */
avg_wire_size *= 5;
avg_wire_size += 22420;
} else { /* plateau at a limit of 8K ints/sec */
avg_wire_size = 32256;
}
/* If we are in low latency mode half our delay which doubles the rate * to somewhere between 100K to 16K ints/sec
*/ if (itr & IXGBE_ITR_ADAPTIVE_LATENCY)
avg_wire_size >>= 1;
/* Resultant value is 256 times larger than it needs to be. This * gives us room to adjust the value as needed to either increase * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc. * * Use addition as we have already recorded the new latency flag * for the ITR value.
*/ switch (q_vector->adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: case IXGBE_LINK_SPEED_100_FULL: default:
itr += DIV_ROUND_UP(avg_wire_size,
IXGBE_ITR_ADAPTIVE_MIN_INC * 256) *
IXGBE_ITR_ADAPTIVE_MIN_INC; break; case IXGBE_LINK_SPEED_2_5GB_FULL: case IXGBE_LINK_SPEED_1GB_FULL: case IXGBE_LINK_SPEED_10_FULL: if (avg_wire_size > 8064)
avg_wire_size = 8064;
itr += DIV_ROUND_UP(avg_wire_size,
IXGBE_ITR_ADAPTIVE_MIN_INC * 64) *
IXGBE_ITR_ADAPTIVE_MIN_INC; break;
}
clear_counts: /* write back value */
ring_container->itr = itr;
/* next update should occur within next jiffy */
ring_container->next_update = next_update + 1;
/** * ixgbe_write_eitr - write EITR register in hardware specific way * @q_vector: structure containing interrupt and ring information * * This function is made to be called by ethtool and by the driver * when it needs to update EITR registers at runtime. Hardware * specific quirks/differences are taken care of here.
*/ void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector)
{ struct ixgbe_adapter *adapter = q_vector->adapter; struct ixgbe_hw *hw = &adapter->hw; int v_idx = q_vector->v_idx;
u32 itr_reg = q_vector->itr & IXGBE_MAX_EITR;
switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB: /* must write high and low 16 bits to reset counter */
itr_reg |= (itr_reg << 16); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610: /* * set the WDIS bit to not clear the timer bits and cause an * immediate assertion of the interrupt
*/
itr_reg |= IXGBE_EITR_CNT_WDIS; break; default: break;
}
IXGBE_WRITE_REG(hw, IXGBE_EITR(v_idx), itr_reg);
}
switch (hw->device_id) { case IXGBE_DEV_ID_82599_T3_LOM: /* * Since the warning interrupt is for both ports * we don't have to check if: * - This interrupt wasn't for our port. * - We may have missed the interrupt so always have to * check if we got a LSC
*/ if (!(eicr & IXGBE_EICR_GPI_SDP0_8259X) &&
!(eicr & IXGBE_EICR_LSC)) return;
/* Check if this is not due to overtemp */ if (!hw->phy.ops.check_overtemp(hw)) return;
break; case IXGBE_DEV_ID_X550EM_A_1G_T: case IXGBE_DEV_ID_X550EM_A_1G_T_L: if (!hw->phy.ops.check_overtemp(hw)) return; break; default: if (adapter->hw.mac.type >= ixgbe_mac_X540) return; if (!(eicr & IXGBE_EICR_GPI_SDP0(hw))) return; break;
}
e_crit(drv, "%s\n", ixgbe_overheat_msg);
/** * ixgbe_check_phy_fw_load - check if PHY FW load failed * @adapter: pointer to adapter structure * @link_cfg_err: bitmap from the link info structure * * Check if external PHY FW load failed and print an error message if it did.
*/ staticvoid ixgbe_check_phy_fw_load(struct ixgbe_adapter *adapter,
u8 link_cfg_err)
{ if (!(link_cfg_err & IXGBE_ACI_LINK_EXTERNAL_PHY_LOAD_FAILURE)) {
adapter->flags2 &= ~IXGBE_FLAG2_PHY_FW_LOAD_FAILED; return;
}
if (adapter->flags2 & IXGBE_FLAG2_PHY_FW_LOAD_FAILED) return;
if (link_cfg_err & IXGBE_ACI_LINK_EXTERNAL_PHY_LOAD_FAILURE) {
netdev_err(adapter->netdev, "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n");
adapter->flags2 |= IXGBE_FLAG2_PHY_FW_LOAD_FAILED;
}
}
/** * ixgbe_check_module_power - check module power level * @adapter: pointer to adapter structure * @link_cfg_err: bitmap from the link info structure * * Check module power level returned by a previous call to aci_get_link_info * and print error messages if module power level is not supported.
*/ staticvoid ixgbe_check_module_power(struct ixgbe_adapter *adapter,
u8 link_cfg_err)
{ /* If module power level is supported, clear the flag. */ if (!(link_cfg_err & (IXGBE_ACI_LINK_INVAL_MAX_POWER_LIMIT |
IXGBE_ACI_LINK_MODULE_POWER_UNSUPPORTED))) {
adapter->flags2 &= ~IXGBE_FLAG2_MOD_POWER_UNSUPPORTED; return;
}
/* If IXGBE_FLAG2_MOD_POWER_UNSUPPORTED was previously set and the * above block didn't clear this bit, there's nothing to do.
*/ if (adapter->flags2 & IXGBE_FLAG2_MOD_POWER_UNSUPPORTED) return;
if (link_cfg_err & IXGBE_ACI_LINK_INVAL_MAX_POWER_LIMIT) {
netdev_err(adapter->netdev, "The installed module is incompatible with the device's NVM image. Cannot start link.\n");
adapter->flags2 |= IXGBE_FLAG2_MOD_POWER_UNSUPPORTED;
} elseif (link_cfg_err & IXGBE_ACI_LINK_MODULE_POWER_UNSUPPORTED) {
netdev_err(adapter->netdev, "The module's power requirements exceed the device's power supply. Cannot start link.\n");
adapter->flags2 |= IXGBE_FLAG2_MOD_POWER_UNSUPPORTED;
}
}
/** * ixgbe_check_link_cfg_err - check if link configuration failed * @adapter: pointer to adapter structure * @link_cfg_err: bitmap from the link info structure * * Print if any link configuration failure happens due to the value in the * link_cfg_err parameter in the link info structure.
*/ staticvoid ixgbe_check_link_cfg_err(struct ixgbe_adapter *adapter,
u8 link_cfg_err)
{
ixgbe_check_module_power(adapter, link_cfg_err);
ixgbe_check_phy_fw_load(adapter, link_cfg_err);
}
/** * ixgbe_process_link_status_event - process the link event * @adapter: pointer to adapter structure * @link_up: true if the physical link is up and false if it is down * @link_speed: current link speed received from the link event * * Return: 0 on success or negative value on failure.
*/ staticint
ixgbe_process_link_status_event(struct ixgbe_adapter *adapter, bool link_up,
u16 link_speed)
{ struct ixgbe_hw *hw = &adapter->hw; int status;
/* Update the link info structures and re-enable link events, * don't bail on failure due to other book keeping needed.
*/
status = ixgbe_update_link_info(hw); if (status)
e_dev_err("Failed to update link status, err %d aq_err %d\n",
status, hw->aci.last_status);
/* Check if the link state is up after updating link info, and treat * this event as an UP event since the link is actually UP now.
*/ if (hw->link.link_info.link_info & IXGBE_ACI_LINK_UP)
link_up = true;
/* Turn off PHY if media was removed. */ if (!(adapter->flags2 & IXGBE_FLAG2_NO_MEDIA) &&
!(hw->link.link_info.link_info & IXGBE_ACI_MEDIA_AVAILABLE))
adapter->flags2 |= IXGBE_FLAG2_NO_MEDIA;
if (ixgbe_process_link_status_event(adapter, link_up, link_speed))
e_dev_warn("Could not process link status event");
}
/** * ixgbe_schedule_fw_event - schedule Firmware event * @adapter: pointer to the adapter structure * * If the adapter is not in down, removing or resetting state, * an event is scheduled.
*/ staticvoid ixgbe_schedule_fw_event(struct ixgbe_adapter *adapter)
{ if (!test_bit(__IXGBE_DOWN, &adapter->state) &&
!test_bit(__IXGBE_REMOVING, &adapter->state) &&
!test_bit(__IXGBE_RESETTING, &adapter->state)) {
adapter->flags2 |= IXGBE_FLAG2_FW_ASYNC_EVENT;
ixgbe_service_event_schedule(adapter);
}
}
/** * ixgbe_aci_event_cleanup - release msg_buf memory * @event: pointer to the event holding msg_buf to be released * * Clean memory allocated for event's msg_buf. Implements auto memory cleanup.
*/ staticvoid ixgbe_aci_event_cleanup(struct ixgbe_aci_event *event)
{
kfree(event->msg_buf);
}
/** * ixgbe_handle_fw_event - handle Firmware event * @adapter: pointer to the adapter structure * * Obtain an event from the ACI and then and then process it according to the * type of the event and the opcode.
*/ staticvoid ixgbe_handle_fw_event(struct ixgbe_adapter *adapter)
{ struct ixgbe_aci_event event __cleanup(ixgbe_aci_event_cleanup); struct ixgbe_hw *hw = &adapter->hw; bool pending = false; int err;
if (adapter->flags2 & IXGBE_FLAG2_FW_ASYNC_EVENT)
adapter->flags2 &= ~IXGBE_FLAG2_FW_ASYNC_EVENT;
event.buf_len = IXGBE_ACI_MAX_BUFFER_SIZE;
event.msg_buf = kzalloc(event.buf_len, GFP_KERNEL); if (!event.msg_buf) return;
do {
err = ixgbe_aci_get_event(hw, &event, &pending); if (err) break;
switch (le16_to_cpu(event.desc.opcode)) { case ixgbe_aci_opc_get_link_status:
ixgbe_handle_link_status_event(adapter, &event); break; case ixgbe_aci_opc_temp_tca_event:
e_crit(drv, "%s\n", ixgbe_overheat_msg);
ixgbe_down(adapter); break; default:
e_warn(hw, "unknown FW async event captured\n"); break;
}
} while (pending);
}
/* * Workaround for Silicon errata. Use clear-by-write instead * of clear-by-read. Reading with EICS will return the * interrupt causes without clearing, which later be done * with the write to EICR.
*/
eicr = IXGBE_READ_REG(hw, IXGBE_EICS);
/* The lower 16bits of the EICR register are for the queue interrupts * which should be masked here in order to not accidentally clear them if * the bits are high when ixgbe_msix_other is called. There is a race * condition otherwise which results in possible performance loss * especially if the ixgbe_msix_other interrupt is triggering * consistently (as it would when PPS is turned on for the X540 device)
*/
eicr &= 0xFFFF0000;
IXGBE_WRITE_REG(hw, IXGBE_EICR, eicr);
if (eicr & IXGBE_EICR_LSC)
ixgbe_check_lsc(adapter);
if (eicr & IXGBE_EICR_MAILBOX)
ixgbe_msg_task(adapter);
if (eicr & IXGBE_EICR_FW_EVENT)
ixgbe_schedule_fw_event(adapter);
switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610: if (hw->phy.type == ixgbe_phy_x550em_ext_t &&
(eicr & IXGBE_EICR_GPI_SDP0_X540)) {
adapter->flags2 |= IXGBE_FLAG2_PHY_INTERRUPT;
ixgbe_service_event_schedule(adapter);
IXGBE_WRITE_REG(hw, IXGBE_EICR,
IXGBE_EICR_GPI_SDP0_X540);
} if (eicr & IXGBE_EICR_ECC) {
e_info(link, "Received ECC Err, initiating reset\n");
set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
ixgbe_service_event_schedule(adapter);
IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
} /* Handle Flow Director Full threshold interrupt */ if (eicr & IXGBE_EICR_FLOW_DIR) { int reinit_count = 0; int i; for (i = 0; i < adapter->num_tx_queues; i++) { struct ixgbe_ring *ring = adapter->tx_ring[i]; if (test_and_clear_bit(__IXGBE_TX_FDIR_INIT_DONE,
&ring->state))
reinit_count++;
} if (reinit_count) { /* no more flow director interrupts until after init */
IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_FLOW_DIR);
adapter->flags2 |= IXGBE_FLAG2_FDIR_REQUIRES_REINIT;
ixgbe_service_event_schedule(adapter);
}
}
ixgbe_check_sfp_event(adapter, eicr);
ixgbe_check_overtemp_event(adapter, eicr); break; default: break;
}
ixgbe_check_fan_failure(adapter, eicr);
if (unlikely(eicr & IXGBE_EICR_TIMESYNC))
ixgbe_ptp_check_pps_event(adapter);
/* re-enable the original interrupt state, no lsc, no queues */ if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable(adapter, false, false);
/* EIAM disabled interrupts (on this vector) for us */
if (q_vector->rx.ring || q_vector->tx.ring)
napi_schedule_irqoff(&q_vector->napi);
return IRQ_HANDLED;
}
/** * ixgbe_poll - NAPI Rx polling callback * @napi: structure for representing this polling device * @budget: how many packets driver is allowed to clean * * This function is used for legacy and MSI, NAPI mode
**/ int ixgbe_poll(struct napi_struct *napi, int budget)
{ struct ixgbe_q_vector *q_vector =
container_of(napi, struct ixgbe_q_vector, napi); struct ixgbe_adapter *adapter = q_vector->adapter; struct ixgbe_ring *ring; int per_ring_budget, work_done = 0; bool clean_complete = true;
#ifdef CONFIG_IXGBE_DCA if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
ixgbe_update_dca(q_vector); #endif
/* Exit if we are called by netpoll */ if (budget <= 0) return budget;
/* attempt to distribute budget to each queue fairly, but don't allow
* the budget to go below 1 because we'll exit polling */ if (q_vector->rx.count > 1)
per_ring_budget = max(budget/q_vector->rx.count, 1); else
per_ring_budget = budget;
/* If all work not completed, return budget and keep polling */ if (!clean_complete) return budget;
/* all work done, exit the polling mode */ if (likely(napi_complete_done(napi, work_done))) { if (adapter->rx_itr_setting & 1)
ixgbe_set_itr(q_vector); if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable_queues(adapter,
BIT_ULL(q_vector->v_idx));
}
return min(work_done, budget - 1);
}
/** * ixgbe_request_msix_irqs - Initialize MSI-X interrupts * @adapter: board private structure * * ixgbe_request_msix_irqs allocates MSI-X vectors and requests * interrupts from the kernel.
**/ staticint ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter)
{ struct net_device *netdev = adapter->netdev; unsignedint ri = 0, ti = 0; int vector, err;
/* * Workaround for silicon errata #26 on 82598. Mask the interrupt * before the read of EICR.
*/
IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK);
/* for NAPI, using EIAM to auto-mask tx/rx interrupt bits on read
* therefore no explicit interrupt disable is necessary */
eicr = IXGBE_READ_REG(hw, IXGBE_EICR); if (!eicr) { /* * shared interrupt alert! * make sure interrupts are enabled because the read will * have disabled interrupts due to EIAM * finish the workaround of silicon errata on 82598. Unmask * the interrupt that we masked before the EICR read.
*/ if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable(adapter, true, true); return IRQ_NONE; /* Not our interrupt */
}
if (eicr & IXGBE_EICR_LSC)
ixgbe_check_lsc(adapter);
if (eicr & IXGBE_EICR_FW_EVENT)
ixgbe_schedule_fw_event(adapter);
switch (hw->mac.type) { case ixgbe_mac_82599EB:
ixgbe_check_sfp_event(adapter, eicr);
fallthrough; case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610: if (eicr & IXGBE_EICR_ECC) {
e_info(link, "Received ECC Err, initiating reset\n");
set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
ixgbe_service_event_schedule(adapter);
IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC);
}
ixgbe_check_overtemp_event(adapter, eicr); break; default: break;
}
ixgbe_check_fan_failure(adapter, eicr); if (unlikely(eicr & IXGBE_EICR_TIMESYNC))
ixgbe_ptp_check_pps_event(adapter);
/* would disable interrupts here but EIAM disabled it */
napi_schedule_irqoff(&q_vector->napi);
/* * re-enable link(maybe) and non-queue interrupts, no flush. * ixgbe_poll will re-enable the queue interrupts
*/ if (!test_bit(__IXGBE_DOWN, &adapter->state))
ixgbe_irq_enable(adapter, false, false);
return IRQ_HANDLED;
}
/** * ixgbe_request_irq - initialize interrupts * @adapter: board private structure * * Attempts to configure interrupts using the best available * capabilities of the hardware and kernel.
**/ staticint ixgbe_request_irq(struct ixgbe_adapter *adapter)
{ struct net_device *netdev = adapter->netdev; int err;
/** * ixgbe_irq_disable - Mask off interrupt generation on the NIC * @adapter: board private structure
**/ staticinlinevoid ixgbe_irq_disable(struct ixgbe_adapter *adapter)
{ switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB:
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, ~0); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610:
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFF0000);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(0), ~0);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC_EX(1), ~0); break; default: break;
}
IXGBE_WRITE_FLUSH(&adapter->hw); if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) { int vector;
for (vector = 0; vector < adapter->num_q_vectors; vector++)
synchronize_irq(adapter->msix_entries[vector].vector);
/* * set WTHRESH to encourage burst writeback, it should not be set * higher than 1 when: * - ITR is 0 as it could cause false TX hangs * - ITR is set to > 100k int/sec and BQL is enabled * * In order to avoid issues WTHRESH + PTHRESH should always be equal * to or less than the number of on chip descriptors, which is * currently 40.
*/ if (!ring->q_vector || (ring->q_vector->itr < IXGBE_100K_ITR))
txdctl |= 1u << 16; /* WTHRESH = 1 */ else
txdctl |= 8u << 16; /* WTHRESH = 8 */
/* * Setting PTHRESH to 32 both improves performance * and avoids a TX hang with DFP enabled
*/
txdctl |= (1u << 8) | /* HTHRESH = 1 */
32; /* PTHRESH = 32 */
/* TXDCTL.EN will return 0 on 82598 if link is down, so skip it */ if (hw->mac.type == ixgbe_mac_82598EB &&
!(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP)) return;
/* poll to verify queue is enabled */ do {
usleep_range(1000, 2000);
txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
} while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); if (!wait_loop)
hw_dbg(hw, "Could not enable Tx Queue %d\n", reg_idx);
}
/** * ixgbe_configure_tx - Configure 8259x Transmit Unit after Reset * @adapter: board private structure * * Configure the Tx unit of the MAC after a reset.
**/ staticvoid ixgbe_configure_tx(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw;
u32 dmatxctl;
u32 i;
ixgbe_setup_mtqc(adapter);
if (hw->mac.type != ixgbe_mac_82598EB) { /* DMATXCTL.EN must be before Tx queues are enabled */
dmatxctl = IXGBE_READ_REG(hw, IXGBE_DMATXCTL);
dmatxctl |= IXGBE_DMATXCTL_TE;
IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, dmatxctl);
}
/* Setup the HW Tx Head and Tail descriptor pointers */ for (i = 0; i < adapter->num_tx_queues; i++)
ixgbe_configure_tx_ring(adapter, adapter->tx_ring[i]); for (i = 0; i < adapter->num_xdp_queues; i++)
ixgbe_configure_tx_ring(adapter, adapter->xdp_ring[i]);
}
if (hw->mac.ops.disable_mdd)
hw->mac.ops.disable_mdd(hw);
if (adapter->ixgbe_ieee_pfc)
pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
/* * We should set the drop enable bit if: * SR-IOV is enabled * or * Number of Rx queues > 1 and flow control is disabled * * This allows us to avoid head of line blocking for security * and performance reasons.
*/ if (adapter->num_vfs || (adapter->num_rx_queues > 1 &&
!(adapter->hw.fc.current_mode & ixgbe_fc_tx_pause) && !pfc_en)) { for (i = 0; i < adapter->num_rx_queues; i++)
ixgbe_enable_rx_drop(adapter, adapter->rx_ring[i]);
} else { for (i = 0; i < adapter->num_rx_queues; i++)
ixgbe_disable_rx_drop(adapter, adapter->rx_ring[i]);
}
if (hw->mac.ops.enable_mdd)
hw->mac.ops.enable_mdd(hw);
}
/* configure the packet buffer length */ if (rx_ring->xsk_pool) {
u32 xsk_buf_len = xsk_pool_get_rx_frame_size(rx_ring->xsk_pool);
/* If the MAC support setting RXDCTL.RLPML, the * SRRCTL[n].BSIZEPKT is set to PAGE_SIZE and * RXDCTL.RLPML is set to the actual UMEM buffer * size. If not, then we are stuck with a 1k buffer * size resolution. In this case frames larger than * the UMEM buffer size viewed in a 1k resolution will * be dropped.
*/ if (hw->mac.type != ixgbe_mac_82599EB)
srrctl |= PAGE_SIZE >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; else
srrctl |= xsk_buf_len >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
} elseif (test_bit(__IXGBE_RX_3K_BUFFER, &rx_ring->state)) {
srrctl |= IXGBE_RXBUFFER_3K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
} else {
srrctl |= IXGBE_RXBUFFER_2K >> IXGBE_SRRCTL_BSIZEPKT_SHIFT;
}
/* configure descriptor type */
srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF;
/** * ixgbe_store_reta - Write the RETA table to HW * @adapter: device handle * * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW.
*/ void ixgbe_store_reta(struct ixgbe_adapter *adapter)
{
u32 i, reta_entries = ixgbe_rss_indir_tbl_entries(adapter); struct ixgbe_hw *hw = &adapter->hw;
u32 reta = 0;
u32 indices_multi;
u8 *indir_tbl = adapter->rss_indir_tbl;
/* Fill out the redirection table as follows: * - 82598: 8 bit wide entries containing pair of 4 bit RSS * indices. * - 82599/X540: 8 bit wide entries containing 4 bit RSS index * - X550: 8 bit wide entries containing 6 bit RSS index
*/ if (adapter->hw.mac.type == ixgbe_mac_82598EB)
indices_multi = 0x11; else
indices_multi = 0x1;
/* Write redirection table to HW */ for (i = 0; i < reta_entries; i++) {
reta |= indices_multi * indir_tbl[i] << (i & 0x3) * 8; if ((i & 3) == 3) { if (i < 128)
IXGBE_WRITE_REG(hw, IXGBE_RETA(i >> 2), reta); else
IXGBE_WRITE_REG(hw, IXGBE_ERETA((i >> 2) - 32),
reta);
reta = 0;
}
}
}
/** * ixgbe_store_vfreta - Write the RETA table to HW (x550 devices in SRIOV mode) * @adapter: device handle * * Write the RSS redirection table stored in adapter.rss_indir_tbl[] to HW.
*/ staticvoid ixgbe_store_vfreta(struct ixgbe_adapter *adapter)
{
u32 i, reta_entries = ixgbe_rss_indir_tbl_entries(adapter); struct ixgbe_hw *hw = &adapter->hw;
u32 vfreta = 0;
/* Write redirection table to HW */ for (i = 0; i < reta_entries; i++) {
u16 pool = adapter->num_rx_pools;
vfreta |= (u32)adapter->rss_indir_tbl[i] << (i & 0x3) * 8; if ((i & 3) != 3) continue;
/* Program table for at least 4 queues w/ SR-IOV so that VFs can * make full use of any rings they may have. We will use the * PSRTYPE register to control how many rings we use within the PF.
*/ if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 4))
rss_i = 4;
/* Fill out hash function seeds */
ixgbe_store_key(adapter);
/* Fill out redirection table */
memset(adapter->rss_indir_tbl, 0, sizeof(adapter->rss_indir_tbl));
for (i = 0, j = 0; i < reta_entries; i++, j++) { if (j == rss_i)
j = 0;
adapter->rss_indir_tbl[i] = j;
}
ixgbe_store_reta(adapter);
}
staticvoid ixgbe_setup_vfreta(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw;
u16 rss_i = adapter->ring_feature[RING_F_RSS].indices; int i, j;
/* Fill out hash function seeds */ for (i = 0; i < 10; i++) {
u16 pool = adapter->num_rx_pools;
while (pool--)
IXGBE_WRITE_REG(hw,
IXGBE_PFVFRSSRK(i, VMDQ_P(pool)),
*(adapter->rss_key + i));
}
/* Fill out the redirection table */ for (i = 0, j = 0; i < 64; i++, j++) { if (j == rss_i)
j = 0;
/** * ixgbe_configure_rscctl - enable RSC for the indicated ring * @adapter: address of board private structure * @ring: structure containing ring specific data
**/ staticvoid ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, struct ixgbe_ring *ring)
{ struct ixgbe_hw *hw = &adapter->hw;
u32 rscctrl;
u8 reg_idx = ring->reg_idx;
if (!ring_is_rsc_enabled(ring)) return;
rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(reg_idx));
rscctrl |= IXGBE_RSCCTL_RSCEN; /* * we must limit the number of descriptors so that the * total size of max desc * buf_len is not greater * than 65536
*/
rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(reg_idx), rscctrl);
}
if (ixgbe_removed(hw->hw_addr)) return; /* RXDCTL.EN will return 0 on 82598 if link is down, so skip it */ if (hw->mac.type == ixgbe_mac_82598EB &&
!(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP)) return;
do {
usleep_range(1000, 2000);
rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
} while (--wait_loop && !(rxdctl & IXGBE_RXDCTL_ENABLE));
if (!wait_loop) {
e_err(drv, "RXDCTL.ENABLE on Rx queue %d not set within " "the polling period\n", reg_idx);
}
}
/* disable queue to avoid use of these values while updating state */
rxdctl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx));
rxdctl &= ~IXGBE_RXDCTL_ENABLE;
/* write value back with RXDCTL.ENABLE bit cleared */
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
IXGBE_WRITE_FLUSH(hw);
IXGBE_WRITE_REG(hw, IXGBE_RDBAL(reg_idx), (rdba & DMA_BIT_MASK(32)));
IXGBE_WRITE_REG(hw, IXGBE_RDBAH(reg_idx), (rdba >> 32));
IXGBE_WRITE_REG(hw, IXGBE_RDLEN(reg_idx),
ring->count * sizeof(union ixgbe_adv_rx_desc)); /* Force flushing of IXGBE_RDLEN to prevent MDD */
IXGBE_WRITE_FLUSH(hw);
if (hw->mac.type == ixgbe_mac_82598EB) { /* * enable cache line friendly hardware writes: * PTHRESH=32 descriptors (half the internal cache), * this also removes ugly rx_no_buffer_count increment * HTHRESH=4 descriptors (to minimize latency on fetch) * WTHRESH=8 burst writeback up to two cache lines
*/
rxdctl &= ~0x3FFFFF;
rxdctl |= 0x080420; #if (PAGE_SIZE < 8192) /* RXDCTL.RLPML does not work on 82599 */
} elseif (hw->mac.type != ixgbe_mac_82599EB) {
rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
IXGBE_RXDCTL_RLPML_EN);
/* Limit the maximum frame size so we don't overrun the skb. * This can happen in SRIOV mode when the MTU of the VF is * higher than the MTU of the PF.
*/ if (ring_uses_build_skb(ring) &&
!test_bit(__IXGBE_RX_3K_BUFFER, &ring->state))
rxdctl |= IXGBE_MAX_2K_FRAME_BUILD_SKB |
IXGBE_RXDCTL_RLPML_EN; #endif
}
/* accept untagged packets until a vlan tag is * specifically set for the VMDQ queue/pool
*/
vmolr = IXGBE_VMOLR_AUPE; while (pool--)
IXGBE_WRITE_REG(hw, IXGBE_VMOLR(VMDQ_P(pool)), vmolr);
/* Enable only the PF's pool for Tx/Rx */
IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), GENMASK(31, vf_shift));
IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset ^ 1), reg_offset - 1);
IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), GENMASK(31, vf_shift));
IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset ^ 1), reg_offset - 1); if (adapter->bridge_mode == BRIDGE_MODE_VEB)
IXGBE_WRITE_REG(hw, IXGBE_PFDTXGSWC, IXGBE_PFDTXGSWC_VT_LBEN);
/* Map PF MAC address in RAR Entry 0 to first pool following VFs */
hw->mac.ops.set_vmdq(hw, 0, VMDQ_P(0));
/* clear VLAN promisc flag so VFTA will be updated if necessary */
adapter->flags2 &= ~IXGBE_FLAG2_VLAN_PROMISC;
/* * Set up VF register offsets for selected VT Mode, * i.e. 32 or 64 VFs for SR-IOV
*/ switch (adapter->ring_feature[RING_F_VMDQ].mask) { case IXGBE_82599_VMDQ_8Q_MASK:
gcr_ext = IXGBE_GCR_EXT_VT_MODE_16; break; case IXGBE_82599_VMDQ_4Q_MASK:
gcr_ext = IXGBE_GCR_EXT_VT_MODE_32; break; default:
gcr_ext = IXGBE_GCR_EXT_VT_MODE_64; break;
}
IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext);
for (i = 0; i < adapter->num_vfs; i++) { /* configure spoof checking */
ixgbe_ndo_set_vf_spoofchk(adapter->netdev, i,
adapter->vfinfo[i].spoofchk_enabled);
#ifdef IXGBE_FCOE /* adjust max frame to be able to do baby jumbo for FCoE */ if ((adapter->flags & IXGBE_FLAG_FCOE_ENABLED) &&
(max_frame < IXGBE_FCOE_JUMBO_FRAME_SIZE))
max_frame = IXGBE_FCOE_JUMBO_FRAME_SIZE;
#endif/* IXGBE_FCOE */
/* adjust max frame to be at least the size of a standard frame */ if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
max_frame = (ETH_FRAME_LEN + ETH_FCS_LEN);
hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); /* set jumbo enable since MHADD.MFS is keeping size locked at max_frame */
hlreg0 |= IXGBE_HLREG0_JUMBOEN;
IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0);
/* * Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring
*/ for (i = 0; i < adapter->num_rx_queues; i++) {
rx_ring = adapter->rx_ring[i];
switch (hw->mac.type) { case ixgbe_mac_82598EB: /* * For VMDq support of different descriptor types or * buffer sizes through the use of multiple SRRCTL * registers, RDRXCTL.MVMEN must be set to 1 * * also, the manual doesn't mention it clearly but DCA hints * will only use queue 0's tags unless this bit is set. Side * effects of setting this bit are only that SRRCTL must be * fully programmed [0..15]
*/
rdrxctl |= IXGBE_RDRXCTL_MVMEN; break; case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610: if (adapter->num_vfs)
rdrxctl |= IXGBE_RDRXCTL_PSP;
fallthrough; case ixgbe_mac_82599EB: case ixgbe_mac_X540: /* Disable RSC for ACK packets */
IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
(IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE; /* hardware requires some bits to be set by default */
rdrxctl |= (IXGBE_RDRXCTL_RSCACKC | IXGBE_RDRXCTL_FCOE_WRFIX);
rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP; break; default: /* We should do nothing since we don't know this hardware */ return;
}
IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
}
/** * ixgbe_configure_rx - Configure 8259x Receive Unit after Reset * @adapter: board private structure * * Configure the Rx unit of the MAC after a reset.
**/ staticvoid ixgbe_configure_rx(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw; int i;
u32 rxctrl, rfctl;
/* disable receives while setting up the descriptors */
hw->mac.ops.disable_rx(hw);
/* Program registers for the distribution of queues */
ixgbe_setup_mrqc(adapter);
/* set_rx_buffer_len must be called before ring initialization */
ixgbe_set_rx_buffer_len(adapter);
/* * Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring
*/ for (i = 0; i < adapter->num_rx_queues; i++)
ixgbe_configure_rx_ring(adapter, adapter->rx_ring[i]);
rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); /* disable drop enable for 82598 parts */ if (hw->mac.type == ixgbe_mac_82598EB)
rxctrl |= IXGBE_RXCTRL_DMBYPS;
/* short cut the special case */ if (vlan == 0) return 0;
/* Search for the vlan id in the VLVF entries */ for (idx = IXGBE_VLVF_ENTRIES; --idx;) {
vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(idx)); if ((vlvf & VLAN_VID_MASK) == vlan) break;
}
idx = ixgbe_find_vlvf_entry(hw, vid); if (!idx) return;
/* See if any other pools are set for this VLAN filter * entry other than the PF.
*/
word = idx * 2 + (VMDQ_P(0) / 32);
bits = ~BIT(VMDQ_P(0) % 32);
bits &= IXGBE_READ_REG(hw, IXGBE_VLVFB(word));
/* Disable the filter so this falls into the default pool. */ if (!bits && !IXGBE_READ_REG(hw, IXGBE_VLVFB(word ^ 1))) { if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), 0);
IXGBE_WRITE_REG(hw, IXGBE_VLVF(idx), 0);
}
}
/* remove VID from filter table */ if (vid && !(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC))
hw->mac.ops.set_vfta(hw, vid, VMDQ_P(0), false, true);
clear_bit(vid, adapter->active_vlans);
return 0;
}
/** * ixgbe_vlan_strip_disable - helper to disable hw vlan stripping * @adapter: driver data
*/ staticvoid ixgbe_vlan_strip_disable(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw;
u32 vlnctrl; int i, j;
switch (hw->mac.type) { case ixgbe_mac_82598EB:
vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
vlnctrl &= ~IXGBE_VLNCTRL_VME;
IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610: for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbe_ring *ring = adapter->rx_ring[i];
/** * ixgbe_vlan_strip_enable - helper to enable hw vlan stripping * @adapter: driver data
*/ staticvoid ixgbe_vlan_strip_enable(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw;
u32 vlnctrl; int i, j;
switch (hw->mac.type) { case ixgbe_mac_82598EB:
vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL);
vlnctrl |= IXGBE_VLNCTRL_VME;
IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610: for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbe_ring *ring = adapter->rx_ring[i];
for (i = IXGBE_VLVF_ENTRIES; --i;) {
u32 vlvf = IXGBE_READ_REG(hw, IXGBE_VLVF(i));
/* pull VLAN ID from VLVF */
vid = vlvf & VLAN_VID_MASK;
/* only concern ourselves with a certain range */ if (vid < vid_start || vid >= vid_end) continue;
if (vlvf) { /* record VLAN ID in VFTA */
vfta[(vid - vid_start) / 32] |= BIT(vid % 32);
/* if PF is part of this then continue */ if (test_bit(vid, adapter->active_vlans)) continue;
}
/* remove PF from the pool */
word = i * 2 + VMDQ_P(0) / 32;
bits = ~BIT(VMDQ_P(0) % 32);
bits &= IXGBE_READ_REG(hw, IXGBE_VLVFB(word));
IXGBE_WRITE_REG(hw, IXGBE_VLVFB(word), bits);
}
/* extract values from active_vlans and write back to VFTA */ for (i = VFTA_BLOCK_SIZE; i--;) {
vid = (vfta_offset + i) * 32;
word = vid / BITS_PER_LONG;
bits = vid % BITS_PER_LONG;
/** * ixgbe_write_mc_addr_list - write multicast addresses to MTA * @netdev: network interface device structure * * Writes multicast address list to the MTA hash table. * Returns: -ENOMEM on failure * 0 on no addresses written * X on writing X addresses to MTA
**/ staticint ixgbe_write_mc_addr_list(struct net_device *netdev)
{ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw;
if (!netif_running(netdev)) return 0;
if (hw->mac.ops.update_mc_addr_list)
hw->mac.ops.update_mc_addr_list(hw, netdev); else return -ENOMEM;
for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { /* do not count default RAR as available */ if (mac_table->state & IXGBE_MAC_STATE_DEFAULT) continue;
/* only count unused and addresses that belong to us */ if (mac_table->state & IXGBE_MAC_STATE_IN_USE) { if (mac_table->pool != pool) continue;
}
count++;
}
return count;
}
/* this function destroys the first RAR entry */ staticvoid ixgbe_mac_set_default_filter(struct ixgbe_adapter *adapter)
{ struct ixgbe_mac_addr *mac_table = &adapter->mac_table[0]; struct ixgbe_hw *hw = &adapter->hw;
/* search table for addr, if found clear IN_USE flag and sync */ for (i = 0; i < hw->mac.num_rar_entries; i++, mac_table++) { /* we can only delete an entry if it is in use */ if (!(mac_table->state & IXGBE_MAC_STATE_IN_USE)) continue; /* we only care about entries that belong to the given pool */ if (mac_table->pool != pool) continue; /* we only care about a specific MAC address */ if (!ether_addr_equal(addr, mac_table->addr)) continue;
/** * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set * @netdev: network interface device structure * * The set_rx_method entry point is called whenever the unicast/multicast * address list or the network interface flags are updated. This routine is * responsible for configuring the hardware for proper unicast, multicast and * promiscuous mode.
**/ void ixgbe_set_rx_mode(struct net_device *netdev)
{ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw;
u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE;
netdev_features_t features = netdev->features; int count;
/* Check for Promiscuous and All Multicast modes */
fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL);
/* set all bits that we expect to always be set */
fctrl &= ~IXGBE_FCTRL_SBP; /* disable store-bad-packets */
fctrl |= IXGBE_FCTRL_BAM;
fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */
fctrl |= IXGBE_FCTRL_PMCF;
/* clear the bits we are changing the status of */
fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); if (netdev->flags & IFF_PROMISC) {
hw->addr_ctrl.user_set_promisc = true;
fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE);
vmolr |= IXGBE_VMOLR_MPE;
features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
} else { if (netdev->flags & IFF_ALLMULTI) {
fctrl |= IXGBE_FCTRL_MPE;
vmolr |= IXGBE_VMOLR_MPE;
}
hw->addr_ctrl.user_set_promisc = false;
}
/* * Write addresses to available RAR registers, if there is not * sufficient space to store all the addresses then enable * unicast promiscuous mode
*/ if (__dev_uc_sync(netdev, ixgbe_uc_sync, ixgbe_uc_unsync)) {
fctrl |= IXGBE_FCTRL_UPE;
vmolr |= IXGBE_VMOLR_ROPE;
}
/* Write addresses to the MTA, if the attempt fails * then we should just turn on promiscuous mode so * that we can at least receive multicast traffic
*/
count = ixgbe_write_mc_addr_list(netdev); if (count < 0) {
fctrl |= IXGBE_FCTRL_MPE;
vmolr |= IXGBE_VMOLR_MPE;
} elseif (count) {
vmolr |= IXGBE_VMOLR_ROMPE;
}
/* This is useful for sniffing bad packets. */ if (features & NETIF_F_RXALL) { /* UPE and MPE will be handled by normal PROMISC logic
* in e1000e_set_rx_mode */
fctrl |= (IXGBE_FCTRL_SBP | /* Receive bad packets */
IXGBE_FCTRL_BAM | /* RX All Bcast Pkts */
IXGBE_FCTRL_PMCF); /* RX All MAC Ctrl Pkts */
fctrl &= ~(IXGBE_FCTRL_DPF); /* NOTE: VLAN filtering is disabled by setting PROMISC */
}
IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl);
if (features & NETIF_F_HW_VLAN_CTAG_RX)
ixgbe_vlan_strip_enable(adapter); else
ixgbe_vlan_strip_disable(adapter);
if (features & NETIF_F_HW_VLAN_CTAG_FILTER)
ixgbe_vlan_promisc_disable(adapter); else
ixgbe_vlan_promisc_enable(adapter);
}
staticvoid ixgbe_napi_enable_all(struct ixgbe_adapter *adapter)
{ int q_idx;
for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++)
napi_enable(&adapter->q_vector[q_idx]->napi);
}
staticvoid ixgbe_napi_disable_all(struct ixgbe_adapter *adapter)
{ int q_idx;
for (q_idx = 0; q_idx < adapter->num_q_vectors; q_idx++)
napi_disable(&adapter->q_vector[q_idx]->napi);
}
#ifdef CONFIG_IXGBE_DCB /** * ixgbe_configure_dcb - Configure DCB hardware * @adapter: ixgbe adapter struct * * This is called by the driver on open to configure the DCB hardware. * This is also called by the gennetlink interface when reconfiguring * the DCB state.
*/ staticvoid ixgbe_configure_dcb(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw; int max_frame = adapter->netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED)) { if (hw->mac.type == ixgbe_mac_82598EB)
netif_set_tso_max_size(adapter->netdev, 65536); return;
}
if (hw->mac.type == ixgbe_mac_82598EB)
netif_set_tso_max_size(adapter->netdev, 32768);
#ifdef IXGBE_FCOE if (adapter->netdev->fcoe_mtu)
max_frame = max(max_frame, IXGBE_FCOE_JUMBO_FRAME_SIZE); #endif
/* Calculate delay value for device */ switch (hw->mac.type) { case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610:
dv_id = IXGBE_DV_X540(link, tc); break; default:
dv_id = IXGBE_DV(link, tc); break;
}
/* Delay value is calculated in bit times convert to KB */
kb = IXGBE_BT2KB(dv_id);
rx_pba = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(pb)) >> 10;
marker = rx_pba - kb;
/* It is possible that the packet buffer is not large enough * to provide required headroom. In this case throw an error * to user and a do the best we can.
*/ if (marker < 0) {
e_warn(drv, "Packet Buffer(%i) can not provide enough" "headroom to support flow control." "Decrease MTU or number of traffic classes\n", pb);
marker = tc + 1;
}
return marker;
}
/** * ixgbe_lpbthresh - calculate low water mark for flow control * * @adapter: board private structure to calculate for * @pb: packet buffer to calculate
*/ staticint ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb)
{ struct ixgbe_hw *hw = &adapter->hw; struct net_device *dev = adapter->netdev; int tc;
u32 dv_id;
/* Calculate max LAN frame size */
tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN;
/* Calculate delay value for device */ switch (hw->mac.type) { case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610:
dv_id = IXGBE_LOW_DV_X540(tc); break; default:
dv_id = IXGBE_LOW_DV(tc); break;
}
/* Delay value is calculated in bit times convert to KB */ return IXGBE_BT2KB(dv_id);
}
/* * ixgbe_pbthresh_setup - calculate and setup high low water marks
*/ staticvoid ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw; int num_tc = adapter->hw_tcs; int i;
if (!num_tc)
num_tc = 1;
for (i = 0; i < num_tc; i++) {
hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i);
hw->fc.low_water[i] = ixgbe_lpbthresh(adapter, i);
/* Low water marks must not be larger than high water marks */ if (hw->fc.low_water[i] > hw->fc.high_water[i])
hw->fc.low_water[i] = 0;
}
for (; i < MAX_TRAFFIC_CLASS; i++)
hw->fc.high_water[i] = 0;
}
/** * ixgbe_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from
**/ staticvoid ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring)
{
u16 i = rx_ring->next_to_clean; struct ixgbe_rx_buffer *rx_buffer = &rx_ring->rx_buffer_info[i];
if (rx_ring->xsk_pool) {
ixgbe_xsk_clean_rx_ring(rx_ring); goto skip_free;
}
/* Free all the Rx ring sk_buffs */ while (i != rx_ring->next_to_alloc) { if (rx_buffer->skb) { struct sk_buff *skb = rx_buffer->skb; if (IXGBE_CB(skb)->page_released)
dma_unmap_page_attrs(rx_ring->dev,
IXGBE_CB(skb)->dma,
ixgbe_rx_pg_size(rx_ring),
DMA_FROM_DEVICE,
IXGBE_RX_DMA_ATTR);
dev_kfree_skb(skb);
}
/* Invalidate cache lines that may have been written to by * device so that we avoid corrupting memory.
*/
dma_sync_single_range_for_cpu(rx_ring->dev,
rx_buffer->dma,
rx_buffer->page_offset,
ixgbe_rx_bufsz(rx_ring),
DMA_FROM_DEVICE);
/* record configuration for macvlan interface in vdev */ for (i = 0; i < num_tc; i++)
netdev_bind_sb_channel_queue(adapter->netdev, vdev,
i, rss_i, baseq + (rss_i * i));
for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
adapter->rx_ring[baseq + i]->netdev = vdev;
/* Guarantee all rings are updated before we update the * MAC address filter.
*/
wmb();
/* ixgbe_add_mac_filter will return an index if it succeeds, so we * need to only treat it as an error value if it is negative.
*/
err = ixgbe_add_mac_filter(adapter, vdev->dev_addr,
VMDQ_P(accel->pool)); if (err >= 0) return 0;
/* if we cannot add the MAC rule then disable the offload */
macvlan_release_l2fw_offload(vdev);
for (i = 0; i < adapter->num_rx_queues_per_pool; i++)
adapter->rx_ring[baseq + i]->netdev = NULL;
netdev_err(vdev, "L2FW offload disabled due to L2 filter error\n");
/* unbind the queues and drop the subordinate channel config */
netdev_unbind_sb_channel(adapter->netdev, vdev);
netdev_set_sb_channel(vdev, 0);
ixgbe_configure_pb(adapter); #ifdef CONFIG_IXGBE_DCB
ixgbe_configure_dcb(adapter); #endif /* * We must restore virtualization before VLANs or else * the VLVF registers will not be populated
*/
ixgbe_configure_virtualization(adapter);
/** * ixgbe_enable_link_status_events - enable link status events * @adapter: pointer to the adapter structure * @mask: event mask to be set * * Enables link status events by invoking ixgbe_configure_lse() * * Return: the exit code of the operation.
*/ staticint ixgbe_enable_link_status_events(struct ixgbe_adapter *adapter,
u16 mask)
{ int err;
err = ixgbe_configure_lse(&adapter->hw, true, mask); if (err) return err;
adapter->lse_mask = mask; return 0;
}
/** * ixgbe_disable_link_status_events - disable link status events * @adapter: pointer to the adapter structure * * Disables link status events by invoking ixgbe_configure_lse() * * Return: the exit code of the operation.
*/ staticint ixgbe_disable_link_status_events(struct ixgbe_adapter *adapter)
{ int err;
err = ixgbe_configure_lse(&adapter->hw, false, adapter->lse_mask); if (err) return err;
adapter->lse_mask = 0; return 0;
}
/** * ixgbe_sfp_link_config - set up SFP+ link * @adapter: pointer to private adapter struct
**/ staticvoid ixgbe_sfp_link_config(struct ixgbe_adapter *adapter)
{ /* * We are assuming the worst case scenario here, and that * is that an SFP was inserted/removed after the reset * but before SFP detection was enabled. As such the best * solution is to just start searching as soon as we start
*/ if (adapter->hw.mac.type == ixgbe_mac_82598EB)
adapter->flags2 |= IXGBE_FLAG2_SEARCH_FOR_SFP;
/** * ixgbe_non_sfp_link_config - set up non-SFP+ link * @hw: pointer to private hardware struct * * Configure non-SFP link. * * Return: 0 on success, negative on failure
**/ staticint ixgbe_non_sfp_link_config(struct ixgbe_hw *hw)
{ struct ixgbe_adapter *adapter = container_of(hw, struct ixgbe_adapter,
hw);
u16 mask = ~((u16)(IXGBE_ACI_LINK_EVENT_UPDOWN |
IXGBE_ACI_LINK_EVENT_MEDIA_NA |
IXGBE_ACI_LINK_EVENT_MODULE_QUAL_FAIL |
IXGBE_ACI_LINK_EVENT_PHY_FW_LOAD_FAIL)); bool autoneg, link_up = false; int ret = -EIO;
u32 speed;
if (hw->mac.ops.check_link)
ret = hw->mac.ops.check_link(hw, &speed, &link_up, false);
if (ret) return ret;
speed = hw->phy.autoneg_advertised; if (!speed && hw->mac.ops.get_link_capabilities) {
ret = hw->mac.ops.get_link_capabilities(hw, &speed,
&autoneg); /* remove NBASE-T speeds from default autonegotiation * to accommodate broken network switches in the field * which cannot cope with advertised NBASE-T speeds
*/
speed &= ~(IXGBE_LINK_SPEED_5GB_FULL |
IXGBE_LINK_SPEED_2_5GB_FULL);
}
if (ret) return ret;
if (hw->mac.ops.setup_link) { if (adapter->hw.mac.type == ixgbe_mac_e610) {
ret = ixgbe_enable_link_status_events(adapter, mask); if (ret) return ret;
}
ret = hw->mac.ops.setup_link(hw, speed, link_up);
}
return ret;
}
/** * ixgbe_check_media_subtask - check for media * @adapter: pointer to adapter structure * * If media is available then initialize PHY user configuration. Configure the * PHY if the interface is up.
*/ staticvoid ixgbe_check_media_subtask(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw;
/* No need to check for media if it's already present */ if (!(adapter->flags2 & IXGBE_FLAG2_NO_MEDIA)) return;
/* Refresh link info and check if media is present */ if (ixgbe_update_link_info(hw)) return;
if (hw->link.link_info.link_info & IXGBE_ACI_MEDIA_AVAILABLE) { /* PHY settings are reset on media insertion, reconfigure * PHY to preserve settings.
*/ if (!(ixgbe_non_sfp_link_config(&adapter->hw)))
adapter->flags2 &= ~IXGBE_FLAG2_NO_MEDIA;
/* A Link Status Event will be generated; the event handler * will complete bringing the interface up
*/
}
}
/** * ixgbe_clear_vf_stats_counters - Clear out VF stats after reset * @adapter: board private structure * * On a reset we need to clear out the VF stats or accounting gets * messed up because they're not clear on read.
**/ staticvoid ixgbe_clear_vf_stats_counters(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw; int i;
if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) {
gpie = IXGBE_GPIE_MSIX_MODE | IXGBE_GPIE_PBA_SUPPORT |
IXGBE_GPIE_OCD;
gpie |= IXGBE_GPIE_EIAME; /* * use EIAM to auto-mask when MSI-X interrupt is asserted * this saves a register write for every interrupt
*/ switch (hw->mac.type) { case ixgbe_mac_82598EB:
IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610: default:
IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(0), 0xFFFFFFFF);
IXGBE_WRITE_REG(hw, IXGBE_EIAM_EX(1), 0xFFFFFFFF); break;
}
} else { /* legacy interrupts, use EIAM to auto-mask when reading EICR,
* specifically only auto mask tx and rx interrupts */
IXGBE_WRITE_REG(hw, IXGBE_EIAM, IXGBE_EICS_RTX_QUEUE);
}
/* XXX: to interrupt immediately for EICS writes, enable this */ /* gpie |= IXGBE_GPIE_EIMEN; */
if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
gpie &= ~IXGBE_GPIE_VTMODE_MASK;
if (ixgbe_is_sfp(hw)) {
ixgbe_sfp_link_config(adapter);
} else {
err = ixgbe_non_sfp_link_config(hw); if (err)
e_err(probe, "link_config FAILED %d\n", err);
}
/* clear any pending interrupts, may auto mask */
IXGBE_READ_REG(hw, IXGBE_EICR);
ixgbe_irq_enable(adapter, true, true);
/* * If this adapter has a fan, check to see if we had a failure * before we enabled the interrupt.
*/ if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); if (esdp & IXGBE_ESDP_SDP1)
e_crit(drv, "Fan has stopped, replace the adapter\n");
}
/* bring the link up in the watchdog, this could race with our first
* link up interrupt but shouldn't be a problem */
adapter->flags |= IXGBE_FLAG_NEED_LINK_UPDATE;
adapter->link_check_timeout = jiffies;
mod_timer(&adapter->service_timer, jiffies);
ixgbe_clear_vf_stats_counters(adapter); /* Set PF Reset Done bit so PF/VF Mail Ops can work */
ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD;
IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
/* update setting rx tx for all active vfs */
ixgbe_set_all_vfs(adapter);
}
void ixgbe_reinit_locked(struct ixgbe_adapter *adapter)
{ /* put off any impending NetWatchDogTimeout */
netif_trans_update(adapter->netdev);
while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state))
usleep_range(1000, 2000); if (adapter->hw.phy.type == ixgbe_phy_fw)
ixgbe_watchdog_link_is_down(adapter);
ixgbe_down(adapter); /* * If SR-IOV enabled then wait a bit before bringing the adapter * back up to give the VFs time to respond to the reset. The * two second wait is based upon the watchdog timer cycle in * the VF driver.
*/ if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
msleep(2000);
ixgbe_up(adapter);
clear_bit(__IXGBE_RESETTING, &adapter->state);
}
void ixgbe_up(struct ixgbe_adapter *adapter)
{ /* hardware has been reset, we need to reload some things */
ixgbe_configure(adapter);
switch (devctl2 & IXGBE_PCIDEVCTRL2_TIMEO_MASK) { case IXGBE_PCIDEVCTRL2_17_34s: case IXGBE_PCIDEVCTRL2_4_8s: /* For now we cap the upper limit on delay to 2 seconds * as we end up going up to 34 seconds of delay in worst * case timeout value.
*/ case IXGBE_PCIDEVCTRL2_1_2s: return 2000000ul; /* 2.0 s */ case IXGBE_PCIDEVCTRL2_260_520ms: return 520000ul; /* 520 ms */ case IXGBE_PCIDEVCTRL2_65_130ms: return 130000ul; /* 130 ms */ case IXGBE_PCIDEVCTRL2_16_32ms: return 32000ul; /* 32 ms */ case IXGBE_PCIDEVCTRL2_1_2ms: return 2000ul; /* 2 ms */ case IXGBE_PCIDEVCTRL2_50_100us: return 100ul; /* 100 us */ case IXGBE_PCIDEVCTRL2_16_32ms_def: return 32000ul; /* 32 ms */ default: break;
}
/* We shouldn't need to hit this path, but just in case default as * though completion timeout is not supported and support 32ms.
*/ return 32000ul;
}
void ixgbe_disable_rx(struct ixgbe_adapter *adapter)
{ unsignedlong wait_delay, delay_interval; struct ixgbe_hw *hw = &adapter->hw; int i, wait_loop;
u32 rxdctl;
/* write value back with RXDCTL.ENABLE bit cleared */
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
}
/* RXDCTL.EN may not change on 82598 if link is down, so skip it */ if (hw->mac.type == ixgbe_mac_82598EB &&
!(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP)) return;
/* Determine our minimum delay interval. We will increase this value * with each subsequent test. This way if the device returns quickly * we should spend as little time as possible waiting, however as * the time increases we will wait for larger periods of time. * * The trick here is that we increase the interval using the * following pattern: 1x 3x 5x 7x 9x 11x 13x 15x 17x 19x. The result * of that wait is that it totals up to 100x whatever interval we * choose. Since our minimum wait is 100us we can just divide the * total timeout by 100 to get our minimum delay interval.
*/
delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
/* OR together the reading of all the active RXDCTL registers, * and then test the result. We need the disable to complete * before we start freeing the memory and invalidating the * DMA mappings.
*/ for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbe_ring *ring = adapter->rx_ring[i];
u8 reg_idx = ring->reg_idx;
/* If the link is not up there shouldn't be much in the way of * pending transactions. Those that are left will be flushed out * when the reset logic goes through the flush sequence to clean out * the pending Tx transactions.
*/ if (!(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP)) goto dma_engine_disable;
/* Determine our minimum delay interval. We will increase this value * with each subsequent test. This way if the device returns quickly * we should spend as little time as possible waiting, however as * the time increases we will wait for larger periods of time. * * The trick here is that we increase the interval using the * following pattern: 1x 3x 5x 7x 9x 11x 13x 15x 17x 19x. The result * of that wait is that it totals up to 100x whatever interval we * choose. Since our minimum wait is 100us we can just divide the * total timeout by 100 to get our minimum delay interval.
*/
delay_interval = ixgbe_get_completion_timeout(adapter) / 100;
/* OR together the reading of all the active TXDCTL registers, * and then test the result. We need the disable to complete * before we start freeing the memory and invalidating the * DMA mappings.
*/ for (i = 0; i < adapter->num_tx_queues; i++) { struct ixgbe_ring *ring = adapter->tx_ring[i];
u8 reg_idx = ring->reg_idx;
txdctl |= IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx));
} for (i = 0; i < adapter->num_xdp_queues; i++) { struct ixgbe_ring *ring = adapter->xdp_ring[i];
u8 reg_idx = ring->reg_idx;
if (!(txdctl & IXGBE_TXDCTL_ENABLE)) goto dma_engine_disable;
}
e_err(drv, "TXDCTL.ENABLE for one or more queues not cleared within the polling period\n");
dma_engine_disable: /* Disable the Tx DMA engine on 82599 and later MAC */ switch (hw->mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610:
IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL,
(IXGBE_READ_REG(hw, IXGBE_DMATXCTL) &
~IXGBE_DMATXCTL_TE));
fallthrough; default: break;
}
}
if (ixgbe_removed(hw->hw_addr)) return; /* lock SFP init bit to prevent race conditions with the watchdog */ while (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state))
usleep_range(1000, 2000);
/* clear all SFP and link config related flags while holding SFP_INIT */
adapter->flags2 &= ~(IXGBE_FLAG2_SEARCH_FOR_SFP |
IXGBE_FLAG2_SFP_NEEDS_RESET);
adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG;
err = hw->mac.ops.init_hw(hw); switch (err) { case 0: case -ENOENT: case -EOPNOTSUPP: break; case -EALREADY:
e_dev_err("primary disable timed out\n"); break; case -EACCES: /* We are running on a pre-production device, log a warning */
e_dev_warn("This device is a pre-production adapter/LOM. " "Please be aware there may be issues associated with " "your hardware. If you are experiencing problems " "please contact your Intel or hardware " "representative who provided you with this " "hardware.\n"); break; default:
e_dev_err("Hardware Error: %d\n", err);
}
clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state);
/* flush entries out of MAC table */
ixgbe_flush_sw_mac_table(adapter);
__dev_uc_unsync(netdev, NULL);
/* do not flush user set addresses */
ixgbe_mac_set_default_filter(adapter);
/* update SAN MAC vmdq pool selection */ if (hw->mac.san_mac_rar_index)
hw->mac.ops.set_vmdq_san_mac(hw, VMDQ_P(0));
if (test_bit(__IXGBE_PTP_RUNNING, &adapter->state))
ixgbe_ptp_reset(adapter);
if (hw->phy.ops.set_phy_power) { if (!netif_running(adapter->netdev) && !adapter->wol)
hw->phy.ops.set_phy_power(hw, false); else
hw->phy.ops.set_phy_power(hw, true);
}
}
/** * ixgbe_clean_tx_ring - Free Tx Buffers * @tx_ring: ring to be cleaned
**/ staticvoid ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
{
u16 i = tx_ring->next_to_clean; struct ixgbe_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
if (tx_ring->xsk_pool) {
ixgbe_xsk_clean_tx_ring(tx_ring); goto out;
}
while (i != tx_ring->next_to_use) { union ixgbe_adv_tx_desc *eop_desc, *tx_desc;
/* Free all the Tx ring sk_buffs */ if (ring_is_xdp(tx_ring))
xdp_return_frame(tx_buffer->xdpf); else
dev_kfree_skb_any(tx_buffer->skb);
/* check for eop_desc to determine the end of the packet */
eop_desc = tx_buffer->next_to_watch;
tx_desc = IXGBE_TX_DESC(tx_ring, i);
/* unmap remaining buffers */ while (tx_desc != eop_desc) {
tx_buffer++;
tx_desc++;
i++; if (unlikely(i == tx_ring->count)) {
i = 0;
tx_buffer = tx_ring->tx_buffer_info;
tx_desc = IXGBE_TX_DESC(tx_ring, 0);
}
/* unmap any remaining paged data */ if (dma_unmap_len(tx_buffer, len))
dma_unmap_page(tx_ring->dev,
dma_unmap_addr(tx_buffer, dma),
dma_unmap_len(tx_buffer, len),
DMA_TO_DEVICE);
}
/* move us one more past the eop_desc for start of next pkt */
tx_buffer++;
i++; if (unlikely(i == tx_ring->count)) {
i = 0;
tx_buffer = tx_ring->tx_buffer_info;
}
}
/* reset BQL for queue */ if (!ring_is_xdp(tx_ring))
netdev_tx_reset_queue(txring_txq(tx_ring));
/** * ixgbe_clean_all_rx_rings - Free Rx Buffers for all queues * @adapter: board private structure
**/ staticvoid ixgbe_clean_all_rx_rings(struct ixgbe_adapter *adapter)
{ int i;
for (i = 0; i < adapter->num_rx_queues; i++)
ixgbe_clean_rx_ring(adapter->rx_ring[i]);
}
/** * ixgbe_clean_all_tx_rings - Free Tx Buffers for all queues * @adapter: board private structure
**/ staticvoid ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter)
{ int i;
for (i = 0; i < adapter->num_tx_queues; i++)
ixgbe_clean_tx_ring(adapter->tx_ring[i]); for (i = 0; i < adapter->num_xdp_queues; i++)
ixgbe_clean_tx_ring(adapter->xdp_ring[i]);
}
#ifdef CONFIG_PCI_IOV if (max_vfs > 0)
e_dev_warn("Enabling SR-IOV VFs using the max_vfs module parameter is deprecated - please use the pci sysfs interface instead.\n");
/* assign number of SR-IOV VFs */ if (hw->mac.type != ixgbe_mac_82598EB) { if (max_vfs > IXGBE_MAX_VFS_DRV_LIMIT) {
max_vfs = 0;
e_dev_warn("max_vfs parameter out of range. Not assigning any SR-IOV VFs\n");
}
} #endif/* CONFIG_PCI_IOV */
/* enable itr by default in dynamic mode */
adapter->rx_itr_setting = 1;
adapter->tx_itr_setting = 1;
/* set default ring sizes */
adapter->tx_ring_count = IXGBE_DEFAULT_TXD;
adapter->rx_ring_count = IXGBE_DEFAULT_RXD;
/* set default work limits */
adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK;
/* PF holds first pool slot */
set_bit(0, adapter->fwd_bitmask);
set_bit(__IXGBE_DOWN, &adapter->state);
/* enable locking for XDP_TX if we have more CPUs than queues */ if (nr_cpu_ids > IXGBE_MAX_XDP_QS)
static_branch_enable(&ixgbe_xdp_locking_key);
return 0;
}
/** * ixgbe_setup_tx_resources - allocate Tx resources (Descriptors) * @tx_ring: tx descriptor ring (for a specific queue) to setup * * Return 0 on success, negative on failure
**/ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
{ struct device *dev = tx_ring->dev; int orig_node = dev_to_node(dev); int ring_node = NUMA_NO_NODE; int size;
err:
vfree(tx_ring->tx_buffer_info);
tx_ring->tx_buffer_info = NULL;
dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n"); return -ENOMEM;
}
/** * ixgbe_setup_all_tx_resources - allocate all queues Tx resources * @adapter: board private structure * * If this function returns with an error, then it's possible one or * more of the rings is populated (while the rest are not). It is the * callers duty to clean those orphaned rings. * * Return 0 on success, negative on failure
**/ staticint ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
{ int i, j = 0, err = 0;
for (i = 0; i < adapter->num_tx_queues; i++) {
err = ixgbe_setup_tx_resources(adapter->tx_ring[i]); if (!err) continue;
e_err(probe, "Allocation for Tx Queue %u failed\n", i); goto err_setup_tx;
} for (j = 0; j < adapter->num_xdp_queues; j++) {
err = ixgbe_setup_tx_resources(adapter->xdp_ring[j]); if (!err) continue;
return 0;
err_setup_tx: /* rewind the index freeing the rings as we go */ while (j--)
ixgbe_free_tx_resources(adapter->xdp_ring[j]); while (i--)
ixgbe_free_tx_resources(adapter->tx_ring[i]); return err;
}
/* XDP RX-queue info */ if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
rx_ring->queue_index, ixgbe_rx_napi_id(rx_ring)) < 0) goto err;
WRITE_ONCE(rx_ring->xdp_prog, adapter->xdp_prog);
return 0;
err:
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); return -ENOMEM;
}
/** * ixgbe_setup_all_rx_resources - allocate all queues Rx resources * @adapter: board private structure * * If this function returns with an error, then it's possible one or * more of the rings is populated (while the rest are not). It is the * callers duty to clean those orphaned rings. * * Return 0 on success, negative on failure
**/ staticint ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
{ int i, err = 0;
for (i = 0; i < adapter->num_rx_queues; i++) {
err = ixgbe_setup_rx_resources(adapter, adapter->rx_ring[i]); if (!err) continue;
#ifdef IXGBE_FCOE
err = ixgbe_setup_fcoe_ddp_resources(adapter); if (!err) #endif return 0;
err_setup_rx: /* rewind the index freeing the rings as we go */ while (i--)
ixgbe_free_rx_resources(adapter->rx_ring[i]); return err;
}
/** * ixgbe_free_tx_resources - Free Tx Resources per Queue * @tx_ring: Tx descriptor ring for a specific queue * * Free all transmit software resources
**/ void ixgbe_free_tx_resources(struct ixgbe_ring *tx_ring)
{
ixgbe_clean_tx_ring(tx_ring);
/** * ixgbe_free_all_tx_resources - Free Tx Resources for All Queues * @adapter: board private structure * * Free all transmit software resources
**/ staticvoid ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter)
{ int i;
for (i = 0; i < adapter->num_tx_queues; i++) if (adapter->tx_ring[i]->desc)
ixgbe_free_tx_resources(adapter->tx_ring[i]); for (i = 0; i < adapter->num_xdp_queues; i++) if (adapter->xdp_ring[i]->desc)
ixgbe_free_tx_resources(adapter->xdp_ring[i]);
}
/** * ixgbe_free_rx_resources - Free Rx Resources * @rx_ring: ring to clean the resources from * * Free all receive software resources
**/ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
{
ixgbe_clean_rx_ring(rx_ring);
#endif for (i = 0; i < adapter->num_rx_queues; i++) if (adapter->rx_ring[i]->desc)
ixgbe_free_rx_resources(adapter->rx_ring[i]);
}
/** * ixgbe_max_xdp_frame_size - returns the maximum allowed frame size for XDP * @adapter: device handle, pointer to adapter
*/ staticint ixgbe_max_xdp_frame_size(struct ixgbe_adapter *adapter)
{ if (PAGE_SIZE >= 8192 || adapter->flags2 & IXGBE_FLAG2_RX_LEGACY) return IXGBE_RXBUFFER_2K; else return IXGBE_RXBUFFER_3K;
}
/** * ixgbe_change_mtu - Change the Maximum Transfer Unit * @netdev: network interface device structure * @new_mtu: new value for maximum frame size * * Returns 0 on success, negative on failure
**/ staticint ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
{ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
if (ixgbe_enabled_xdp_adapter(adapter)) { int new_frame_size = new_mtu + IXGBE_PKT_HDR_PAD;
if (new_frame_size > ixgbe_max_xdp_frame_size(adapter)) {
e_warn(probe, "Requested MTU size is not supported with XDP\n"); return -EINVAL;
}
}
/* * For 82599EB we cannot allow legacy VFs to enable their receive * paths when MTU greater than 1500 is configured. So display a * warning that legacy VFs will be disabled.
*/ if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) &&
(adapter->hw.mac.type == ixgbe_mac_82599EB) &&
(new_mtu > ETH_DATA_LEN))
e_warn(probe, "Setting MTU > 1500 will disable legacy VFs\n");
netdev_dbg(netdev, "changing MTU from %d to %d\n",
netdev->mtu, new_mtu);
/* must set new MTU before calling down or up */
WRITE_ONCE(netdev->mtu, new_mtu);
if (netif_running(netdev))
ixgbe_reinit_locked(adapter);
return 0;
}
/** * ixgbe_open - Called when a network interface is made active * @netdev: network interface device structure * * Returns 0 on success, negative value on failure * * The open entry point is called when a network interface is made * active by the system (IFF_UP). At this point all resources needed * for transmit and receive operations are allocated, the interrupt * handler is registered with the OS, the watchdog timer is started, * and the stack is notified that the interface is ready.
**/ int ixgbe_open(struct net_device *netdev)
{ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev); struct ixgbe_hw *hw = &adapter->hw; int err, queues;
/* disallow open during test */ if (test_bit(__IXGBE_TESTING, &adapter->state)) return -EBUSY;
err = ixgbe_request_irq(adapter); if (err) goto err_req_irq;
/* Notify the stack of the actual queue counts. */
queues = adapter->num_tx_queues;
err = netif_set_real_num_tx_queues(netdev, queues); if (err) goto err_set_queues;
/** * ixgbe_close - Disables a network interface * @netdev: network interface device structure * * Returns 0, this is not allowed to fail * * The close entry point is called when an interface is de-activated * by the OS. The hardware is still under the drivers control, but * needs to be disabled. A global MAC reset is issued to stop the * hardware, and all transmit and receive resources are freed.
**/ int ixgbe_close(struct net_device *netdev)
{ struct ixgbe_adapter *adapter = ixgbe_from_netdev(netdev);
ixgbe_ptp_stop(adapter);
if (netif_device_present(netdev))
ixgbe_close_suspend(adapter);
switch (hw->mac.type) { case ixgbe_mac_82598EB:
pci_wake_from_d3(pdev, false); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610:
pci_wake_from_d3(pdev, !!wufc); break; default: break;
}
*enable_wake = !!wufc; if (hw->phy.ops.set_phy_power && !*enable_wake)
hw->phy.ops.set_phy_power(hw, false);
ixgbe_release_hw_control(adapter);
if (!test_and_set_bit(__IXGBE_DISABLED, &adapter->state))
pci_disable_device(pdev);
bytes = 0;
packets = 0; /* gather some stats to the adapter struct that are per queue */ for (i = 0; i < adapter->num_tx_queues; i++) { struct ixgbe_ring *tx_ring = READ_ONCE(adapter->tx_ring[i]);
if (!tx_ring) continue;
restart_queue += tx_ring->tx_stats.restart_queue;
tx_busy += tx_ring->tx_stats.tx_busy;
bytes += tx_ring->stats.bytes;
packets += tx_ring->stats.packets;
} for (i = 0; i < adapter->num_xdp_queues; i++) { struct ixgbe_ring *xdp_ring = READ_ONCE(adapter->xdp_ring[i]);
/* VF Stats Collection - skip while resetting because these * are not clear on read and otherwise you'll sometimes get * crazy values.
*/ if (!test_bit(__IXGBE_RESETTING, &adapter->state)) { for (i = 0; i < adapter->num_vfs; i++) {
UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPRC(i),
adapter->vfinfo[i].last_vfstats.gprc,
adapter->vfinfo[i].vfstats.gprc);
UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPTC(i),
adapter->vfinfo[i].last_vfstats.gptc,
adapter->vfinfo[i].vfstats.gptc);
UPDATE_VF_COUNTER_36bit(IXGBE_PVFGORC_LSB(i),
IXGBE_PVFGORC_MSB(i),
adapter->vfinfo[i].last_vfstats.gorc,
adapter->vfinfo[i].vfstats.gorc);
UPDATE_VF_COUNTER_36bit(IXGBE_PVFGOTC_LSB(i),
IXGBE_PVFGOTC_MSB(i),
adapter->vfinfo[i].last_vfstats.gotc,
adapter->vfinfo[i].vfstats.gotc);
UPDATE_VF_COUNTER_32bit(IXGBE_PVFMPRC(i),
adapter->vfinfo[i].last_vfstats.mprc,
adapter->vfinfo[i].vfstats.mprc);
}
}
}
/** * ixgbe_fdir_reinit_subtask - worker thread to reinit FDIR filter table * @adapter: pointer to the device adapter structure
**/ staticvoid ixgbe_fdir_reinit_subtask(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw; int i;
if (!(adapter->flags2 & IXGBE_FLAG2_FDIR_REQUIRES_REINIT)) return;
/* if interface is down do nothing */ if (test_bit(__IXGBE_DOWN, &adapter->state)) return;
/* do nothing if we are not using signature filters */ if (!(adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE)) return;
adapter->fdir_overflow++;
if (ixgbe_reinit_fdir_tables_82599(hw) == 0) { for (i = 0; i < adapter->num_tx_queues; i++)
set_bit(__IXGBE_TX_FDIR_INIT_DONE,
&(adapter->tx_ring[i]->state)); for (i = 0; i < adapter->num_xdp_queues; i++)
set_bit(__IXGBE_TX_FDIR_INIT_DONE,
&adapter->xdp_ring[i]->state); /* re-enable flow director interrupts */
IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
} else {
e_err(probe, "failed to finish FDIR re-initialization, " "ignored adding FDIR ATR filters\n");
}
}
/** * ixgbe_check_hang_subtask - check for hung queues and dropped interrupts * @adapter: pointer to the device adapter structure * * This function serves two purposes. First it strobes the interrupt lines * in order to make certain interrupts are occurring. Secondly it sets the * bits needed to check for TX hangs. As a result we should immediately * determine if a hang has occurred.
*/ staticvoid ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw;
u64 eics = 0; int i;
/* If we're down, removing or resetting, just bail */ if (test_bit(__IXGBE_DOWN, &adapter->state) ||
test_bit(__IXGBE_REMOVING, &adapter->state) ||
test_bit(__IXGBE_RESETTING, &adapter->state)) return;
/* Force detection of hung controller */ if (netif_carrier_ok(adapter->netdev)) for (i = 0; i < adapter->num_tx_queues; i++)
set_check_for_tx_hang(adapter->tx_ring[i]);
if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) { /* * for legacy and MSI interrupts don't set any bits * that are enabled for EIAM, because this operation * would set *both* EIMS and EICS for any bit in EIAM
*/
IXGBE_WRITE_REG(hw, IXGBE_EICS,
(IXGBE_EICS_TCP_TIMER | IXGBE_EICS_OTHER));
} else { /* get one bit for every active tx/rx interrupt vector */ for (i = 0; i < adapter->num_q_vectors; i++) { struct ixgbe_q_vector *qv = adapter->q_vector[i]; if (qv->rx.ring || qv->tx.ring)
eics |= BIT_ULL(i);
}
}
/* Cause software interrupt to ensure rings are cleaned */
ixgbe_irq_rearm_queues(adapter, eics);
}
/** * ixgbe_watchdog_update_link - update the link status * @adapter: pointer to the device adapter structure
**/ staticvoid ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw;
u32 link_speed = adapter->link_speed; bool link_up = adapter->link_up; bool pfc_en = adapter->dcb_cfg.pfc_mode_enable;
if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)) return;
if (hw->mac.ops.check_link) {
hw->mac.ops.check_link(hw, &link_speed, &link_up, false);
} else { /* always assume link is up, if no check link function */
link_speed = IXGBE_LINK_SPEED_10GB_FULL;
link_up = true;
}
if (adapter->ixgbe_ieee_pfc)
pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en);
/* resetting the PF is only needed for MAC before X550 */ if (hw->mac.type >= ixgbe_mac_X550) returnfalse;
for (i = 0; i < adapter->num_vfs; i++) { for (j = 0; j < q_per_pool; j++) {
u32 h, t;
h = IXGBE_READ_REG(hw, IXGBE_PVFTDHN(q_per_pool, i, j));
t = IXGBE_READ_REG(hw, IXGBE_PVFTDTN(q_per_pool, i, j));
if (h != t) returntrue;
}
}
returnfalse;
}
/** * ixgbe_watchdog_flush_tx - flush queues on link down * @adapter: pointer to the device adapter structure
**/ staticvoid ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter)
{ if (!netif_carrier_ok(adapter->netdev)) { if (ixgbe_ring_tx_pending(adapter) ||
ixgbe_vf_tx_pending(adapter)) { /* We've lost link, so the controller stops DMA, * but we've got queued Tx work that's never going * to get done, so reset controller to flush Tx. * (Do the reset outside of interrupt context).
*/
e_warn(drv, "initiating reset to clear Tx work after link loss\n");
set_bit(__IXGBE_RESET_REQUESTED, &adapter->state);
}
}
}
gpc = IXGBE_READ_REG(hw, IXGBE_TXDGPC); if (gpc) /* If incrementing then no need for the check below */ return; /* Check to see if a bad DMA write target from an errant or * malicious VF has caused a PCIe error. If so then we can * issue a VFLR to the offending VF(s) and then resume without * requesting a full slot reset.
*/
if (!pdev) return;
/* check status reg for all VFs owned by this PF */ for (vf = 0; vf < adapter->num_vfs; ++vf) { struct pci_dev *vfdev = adapter->vfinfo[vf].vfdev;
u16 status_reg;
/** * ixgbe_watchdog_subtask - check and bring link up * @adapter: pointer to the device adapter structure
**/ staticvoid ixgbe_watchdog_subtask(struct ixgbe_adapter *adapter)
{ /* if interface is down, removing or resetting, do nothing */ if (test_bit(__IXGBE_DOWN, &adapter->state) ||
test_bit(__IXGBE_REMOVING, &adapter->state) ||
test_bit(__IXGBE_RESETTING, &adapter->state)) return;
ixgbe_watchdog_update_link(adapter);
if (adapter->link_up)
ixgbe_watchdog_link_is_up(adapter); else
ixgbe_watchdog_link_is_down(adapter);
/** * ixgbe_sfp_detection_subtask - poll for SFP+ cable * @adapter: the ixgbe adapter structure
**/ staticvoid ixgbe_sfp_detection_subtask(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw; int err;
/* not searching for SFP so there is nothing to do here */ if (!(adapter->flags2 & IXGBE_FLAG2_SEARCH_FOR_SFP) &&
!(adapter->flags2 & IXGBE_FLAG2_SFP_NEEDS_RESET)) return;
if (adapter->sfp_poll_time &&
time_after(adapter->sfp_poll_time, jiffies)) return; /* If not yet time to poll for SFP */
/* someone else is in init, wait until next service event */ if (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state)) return;
err = hw->phy.ops.identify_sfp(hw); if (err == -EOPNOTSUPP) goto sfp_out;
if (err == -ENOENT) { /* If no cable is present, then we need to reset
* the next time we find a good cable. */
adapter->flags2 |= IXGBE_FLAG2_SFP_NEEDS_RESET;
}
/* exit on error */ if (err) goto sfp_out;
/* exit if reset not needed */ if (!(adapter->flags2 & IXGBE_FLAG2_SFP_NEEDS_RESET)) goto sfp_out;
adapter->flags2 &= ~IXGBE_FLAG2_SFP_NEEDS_RESET;
/* * A module may be identified correctly, but the EEPROM may not have * support for that module. setup_sfp() will fail in that case, so * we should not allow that module to load.
*/ if (hw->mac.type == ixgbe_mac_82598EB)
err = hw->phy.ops.reset(hw); else
err = hw->mac.ops.setup_sfp(hw);
if (err == -EOPNOTSUPP &&
adapter->netdev->reg_state == NETREG_REGISTERED) {
e_dev_err("failed to initialize because an unsupported " "SFP+ module type was detected.\n");
e_dev_err("Reload the driver after installing a " "supported module.\n");
unregister_netdev(adapter->netdev);
}
}
/** * ixgbe_sfp_link_config_subtask - set up link SFP after module install * @adapter: the ixgbe adapter structure
**/ staticvoid ixgbe_sfp_link_config_subtask(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw;
u32 cap_speed;
u32 speed; bool autoneg = false;
if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_CONFIG)) return;
/* someone else is in init, wait until next service event */ if (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state)) return;
if (hw->mac.ops.get_fw_ver && hw->mac.ops.get_fw_ver(hw)) return 0;
if (hw->api_maj_ver > IXGBE_FW_API_VER_MAJOR) {
e_dev_err("The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n");
adapter->flags2 |= IXGBE_FLAG2_API_MISMATCH; return -EOPNOTSUPP;
} elseif (hw->api_maj_ver == IXGBE_FW_API_VER_MAJOR &&
hw->api_min_ver > IXGBE_FW_API_VER_MINOR + IXGBE_FW_API_VER_DIFF_ALLOWED) {
e_dev_info("The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n");
adapter->flags2 |= IXGBE_FLAG2_API_MISMATCH;
} elseif (hw->api_maj_ver < IXGBE_FW_API_VER_MAJOR ||
hw->api_min_ver < IXGBE_FW_API_VER_MINOR - IXGBE_FW_API_VER_DIFF_ALLOWED) {
e_dev_info("The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n");
adapter->flags2 |= IXGBE_FLAG2_API_MISMATCH;
}
return 0;
}
/** * ixgbe_check_fw_error - Check firmware for errors * @adapter: the adapter private structure * * Check firmware errors in register FWSM
*/ staticbool ixgbe_check_fw_error(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw;
u32 fwsm; int err;
/* skip if E610's FW is reloading, warning in that case may be misleading */ if (fwsm & IXGBE_FWSM_EXT_ERR_IND_MASK ||
(!(fwsm & IXGBE_FWSM_FW_VAL_BIT) && !(hw->mac.type == ixgbe_mac_e610)))
e_dev_warn("Warning firmware error detected FWSM: 0x%08X\n",
fwsm);
if (hw->mac.ops.fw_recovery_mode && hw->mac.ops.fw_recovery_mode(hw)) {
e_dev_err("Firmware recovery mode detected. Limiting functionality. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n"); returntrue;
} if (!(adapter->flags2 & IXGBE_FLAG2_API_MISMATCH)) {
err = ixgbe_check_fw_api_mismatch(adapter); if (err) returntrue;
}
/* return here if FW rollback mode has been already detected */ if (adapter->flags2 & IXGBE_FLAG2_FW_ROLLBACK) returnfalse;
if (hw->mac.ops.get_fw_ver && hw->mac.ops.get_fw_ver(hw)) goto no_version;
if (hw->mac.ops.get_nvm_ver &&
hw->mac.ops.get_nvm_ver(hw, nvm_info)) goto no_version;
snprintf(ver_buff, sizeof(ver_buff), "Current version is NVM:%x.%x.%x, FW:%d.%d. ",
nvm_info->major, nvm_info->minor, nvm_info->eetrack,
hw->fw_maj_ver, hw->fw_maj_ver);
no_version:
e_dev_warn("Firmware rollback mode detected. %sDevice may exhibit limited functionality. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware rollback mode.",
ver_buff);
/* initialize outer IP header fields */ if (ip.v4->version == 4) { unsignedchar *csum_start = skb_checksum_start(skb); unsignedchar *trans_start = ip.hdr + (ip.v4->ihl * 4); int len = csum_start - trans_start;
/* IP header will have to cancel out any data that * is not a part of the outer IP header, so set to * a reverse csum if needed, else init check to 0.
*/
ip.v4->check = (skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) ?
csum_fold(csum_partial(trans_start,
len, 0)) : 0;
type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
/* * Check Context must be set if Tx switch is enabled, which it * always is for case where virtual functions are running
*/
olinfo_status |= IXGBE_SET_FLAG(tx_flags,
IXGBE_TX_FLAGS_CC,
IXGBE_ADVTXD_CC);
/* set the timestamp */
first->time_stamp = jiffies;
skb_tx_timestamp(skb);
/* * Force memory writes to complete before letting h/w know there * are new descriptors to fetch. (Only applicable for weak-ordered * memory model archs, such as IA-64). * * We also need this memory barrier to make certain all of the * status bits have been updated before next_to_watch is written.
*/
wmb();
/* set next_to_watch value indicating a packet is present */
first->next_to_watch = tx_desc;
i++; if (i == tx_ring->count)
i = 0;
tx_ring->next_to_use = i;
ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
writel(i, tx_ring->tail);
}
/* if ring doesn't have a interrupt vector, cannot perform ATR */ if (!q_vector) return;
/* do nothing if sampling is disabled */ if (!ring->atr_sample_rate) return;
ring->atr_count++;
/* currently only IPv4/IPv6 with TCP is supported */ if ((first->protocol != htons(ETH_P_IP)) &&
(first->protocol != htons(ETH_P_IPV6))) return;
/* snag network header to get L4 type and address */
skb = first->skb;
hdr.network = skb_network_header(skb); if (unlikely(hdr.network <= skb->data)) return; if (skb->encapsulation &&
first->protocol == htons(ETH_P_IP) &&
hdr.ipv4->protocol == IPPROTO_UDP) { struct ixgbe_adapter *adapter = q_vector->adapter;
if (unlikely(skb_tail_pointer(skb) < hdr.network +
vxlan_headroom(0))) return;
/* verify the port is recognized as VXLAN */ if (adapter->vxlan_port &&
udp_hdr(skb)->dest == adapter->vxlan_port)
hdr.network = skb_inner_network_header(skb);
if (adapter->geneve_port &&
udp_hdr(skb)->dest == adapter->geneve_port)
hdr.network = skb_inner_network_header(skb);
}
/* Make sure we have at least [minimum IPv4 header + TCP] * or [IPv6 header] bytes
*/ if (unlikely(skb_tail_pointer(skb) < hdr.network + 40)) return;
/* Currently only IPv4/IPv6 with TCP is supported */ switch (hdr.ipv4->version) { case IPVERSION: /* access ihl as u8 to avoid unaligned access on ia64 */
hlen = (hdr.network[0] & 0x0F) << 2;
l4_proto = hdr.ipv4->protocol; break; case 6:
hlen = hdr.network - skb->data;
l4_proto = ipv6_find_hdr(skb, &hlen, IPPROTO_TCP, NULL, NULL);
hlen -= hdr.network - skb->data; break; default: return;
}
if (l4_proto != IPPROTO_TCP) return;
if (unlikely(skb_tail_pointer(skb) < hdr.network +
hlen + sizeof(struct tcphdr))) return;
th = (struct tcphdr *)(hdr.network + hlen);
/* skip this packet since the socket is closing */ if (th->fin) return;
/* sample on all syn packets or once every atr sample count */ if (!th->syn && (ring->atr_count < ring->atr_sample_rate)) return;
/* * src and dst are inverted, think how the receiver sees them * * The input is broken into two sections, a non-compressed section * containing vm_pool, vlan_id, and flow_type. The rest of the data * is XORed together and stored in the compressed dword.
*/
input.formatted.vlan_id = vlan_id;
/* * since src port and flex bytes occupy the same word XOR them together * and write the value to source port portion of compressed dword
*/ if (first->tx_flags & (IXGBE_TX_FLAGS_SW_VLAN | IXGBE_TX_FLAGS_HW_VLAN))
common.port.src ^= th->dest ^ htons(ETH_P_8021Q); else
common.port.src ^= th->dest ^ first->protocol;
common.port.dst ^= th->source;
if (hdr.network != skb_network_header(skb))
input.formatted.flow_type |= IXGBE_ATR_L4TYPE_TUNNEL_MASK;
/* This assumes the Rx queue and Tx queue are bound to the same CPU */
ixgbe_fdir_add_signature_filter_82599(&q_vector->adapter->hw,
input, common, ring->queue_index);
}
/* * only execute the code below if protocol is FCoE * or FIP and we have FCoE enabled on the adapter
*/ switch (vlan_get_protocol(skb)) { case htons(ETH_P_FCOE): case htons(ETH_P_FIP):
adapter = ixgbe_from_netdev(dev);
data = skb_frag_address(&sinfo->frags[i]);
len = skb_frag_size(&sinfo->frags[i]);
i++;
} /* put descriptor type bits */
tx_desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD);
/* Avoid any potential race with xdp_xmit and cleanup */
smp_wmb();
/* * need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD, * + 1 desc for skb_headlen/IXGBE_MAX_DATA_PER_TXD, * + 2 desc gap to keep tail from touching head, * + 1 desc for context descriptor, * otherwise try next time
*/ for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
count += TXD_USE_COUNT(skb_frag_size(
&skb_shinfo(skb)->frags[f]));
if (ixgbe_maybe_stop_tx(tx_ring, count + 3)) {
tx_ring->tx_stats.tx_busy++; return NETDEV_TX_BUSY;
}
/* record the location of the first descriptor for this packet */
first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
first->skb = skb;
first->bytecount = skb->len;
first->gso_segs = 1;
/* if we have a HW VLAN tag being added default to the HW one */ if (skb_vlan_tag_present(skb)) {
tx_flags |= skb_vlan_tag_get(skb) << IXGBE_TX_FLAGS_VLAN_SHIFT;
tx_flags |= IXGBE_TX_FLAGS_HW_VLAN; /* else if it is a SW VLAN check the next protocol and store the tag */
} elseif (protocol == htons(ETH_P_8021Q)) { struct vlan_hdr *vhdr, _vhdr;
vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr); if (!vhdr) goto out_drop;
#ifdef CONFIG_PCI_IOV /* * Use the l2switch_enable flag - would be false if the DMA * Tx switch had been disabled.
*/ if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
tx_flags |= IXGBE_TX_FLAGS_CC;
/* * The minimum packet size for olinfo paylen is 17 so pad the skb * in order to meet this minimum size requirement.
*/ if (skb_put_padto(skb, 17)) return NETDEV_TX_OK;
tx_ring = ring ? ring : adapter->tx_ring[skb_get_queue_mapping(skb)]; if (unlikely(test_bit(__IXGBE_TX_DISABLED, &tx_ring->state))) return NETDEV_TX_BUSY;
/* we only care about macvlans... */ if (!netif_is_macvlan(vdev)) return 0;
/* that have hardware offload enabled... */
accel = macvlan_accel_priv(vdev); if (!accel) return 0;
/* If we can relocate to a different bit do so */
pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools); if (pool < adapter->num_rx_pools) {
set_bit(pool, adapter->fwd_bitmask);
accel->pool = pool; return 0;
}
/* if we cannot find a free pool then disable the offload */
netdev_err(vdev, "L2FW offload disabled due to lack of queue resources\n");
macvlan_release_l2fw_offload(vdev);
/* unbind the queues and drop the subordinate channel config */
netdev_unbind_sb_channel(adapter->netdev, vdev);
netdev_set_sb_channel(vdev, 0);
/* Hardware has to reinitialize queues and interrupts to * match packet buffer alignment. Unfortunately, the * hardware is not flexible enough to do this dynamically.
*/ if (netif_running(dev))
ixgbe_close(dev); else
ixgbe_reset(adapter);
ixgbe_clear_interrupt_scheme(adapter);
#ifdef CONFIG_IXGBE_DCB if (tc) { if (adapter->xdp_prog) {
e_warn(probe, "DCB is not supported with XDP\n");
ixgbe_init_interrupt_scheme(adapter); if (netif_running(dev))
ixgbe_open(dev); return -EINVAL;
}
if ((uhtid != 0x800) && (uhtid >= IXGBE_MAX_LINK_HANDLE)) return -EINVAL;
/* Clear this filter in the link data it is associated with */ if (uhtid != 0x800) {
jump = adapter->jump_tables[uhtid]; if (!jump) return -EINVAL; if (!test_bit(loc - 1, jump->child_loc_map)) return -EINVAL;
clear_bit(loc - 1, jump->child_loc_map);
}
/* Check if the filter being deleted is a link */ for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++) {
jump = adapter->jump_tables[i]; if (jump && jump->link_hdl == hdl) { /* Delete filters in the hardware in the child hash * table associated with this link
*/ for (j = 0; j < IXGBE_MAX_HW_ENTRIES; j++) { if (!test_bit(j, jump->child_loc_map)) continue;
spin_lock(&adapter->fdir_perfect_lock);
err = ixgbe_update_ethtool_fdir_entry(adapter,
NULL,
j + 1);
spin_unlock(&adapter->fdir_perfect_lock);
clear_bit(j, jump->child_loc_map);
} /* Remove resources for this link */
kfree(jump->input);
kfree(jump->mask);
kfree(jump);
adapter->jump_tables[i] = NULL; return err;
}
}
/* At the moment cls_u32 jumps to network layer and skips past * L2 headers. The canonical method to match L2 frames is to use * negative values. However this is error prone at best but really * just broken because there is no way to "know" what sort of hdr * is in front of the network layer. Fix cls_u32 to support L2 * headers when needed.
*/ if (protocol != htons(ETH_P_IP)) return err;
if (loc >= ((1024 << adapter->fdir_pballoc) - 2)) {
e_err(drv, "Location out of range\n"); return err;
}
/* cls u32 is a graph starting at root node 0x800. The driver tracks * links and also the fields used to advance the parser across each * link (e.g. nexthdr/eat parameters from 'tc'). This way we can map * the u32 graph onto the hardware parse graph denoted in ixgbe_model.h * To add support for new nodes update ixgbe_model.h parse structures * this function _should_ be generic try not to hardcode values here.
*/ if (uhtid == 0x800) {
field_ptr = (adapter->jump_tables[0])->mat;
} else { if (uhtid >= IXGBE_MAX_LINK_HANDLE) return err; if (!adapter->jump_tables[uhtid]) return err;
field_ptr = (adapter->jump_tables[uhtid])->mat;
}
if (!field_ptr) return err;
/* At this point we know the field_ptr is valid and need to either * build cls_u32 link or attach filter. Because adding a link to * a handle that does not exist is invalid and the same for adding * rules to handles that don't exist.
*/
if (link_uhtid) { struct ixgbe_nexthdr *nexthdr = ixgbe_ipv4_jumps;
if (link_uhtid >= IXGBE_MAX_LINK_HANDLE) return err;
if (!test_bit(link_uhtid - 1, &adapter->tables)) return err;
/* Multiple filters as links to the same hash table are not * supported. To add a new filter with the same next header * but different match/jump conditions, create a new hash table * and link to it.
*/ if (adapter->jump_tables[link_uhtid] &&
(adapter->jump_tables[link_uhtid])->link_hdl) {
e_err(drv, "Link filter exists for link: %x\n",
link_uhtid); return err;
}
for (i = 0; nexthdr[i].jump; i++) { if (nexthdr[i].o != cls->knode.sel->offoff ||
nexthdr[i].s != cls->knode.sel->offshift ||
nexthdr[i].m !=
(__force u32)cls->knode.sel->offmask) return err;
if ((uhtid != 0x800) && (adapter->jump_tables[uhtid])) { if ((adapter->jump_tables[uhtid])->input)
memcpy(input, (adapter->jump_tables[uhtid])->input, sizeof(*input)); if ((adapter->jump_tables[uhtid])->mask)
memcpy(mask, (adapter->jump_tables[uhtid])->mask, sizeof(*mask));
/* Lookup in all child hash tables if this location is already * filled with a filter
*/ for (i = 1; i < IXGBE_MAX_LINK_HANDLE; i++) { struct ixgbe_jump_table *link = adapter->jump_tables[i];
/* go back to full RSS if we're not running SR-IOV */ if (!adapter->ring_feature[RING_F_VMDQ].offset)
adapter->flags &= ~(IXGBE_FLAG_VMDQ_ENABLED |
IXGBE_FLAG_SRIOV_ENABLED);
/* Make sure RSC matches LRO, reset if change */ if (!(features & NETIF_F_LRO)) { if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)
need_reset = true;
adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED;
} elseif ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) &&
!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) { if (adapter->rx_itr_setting == 1 ||
adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) {
adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED;
need_reset = true;
} elseif ((changed ^ features) & NETIF_F_LRO) {
e_info(probe, "rx-usecs set too low, " "disabling RSC\n");
}
}
/* * Check if Flow Director n-tuple support or hw_tc support was * enabled or disabled. If the state changed, we need to reset.
*/ if ((features & NETIF_F_NTUPLE) || (features & NETIF_F_HW_TC)) { /* turn off ATR, enable perfect filters and reset */ if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE))
need_reset = true;
adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE;
} else { /* turn off perfect filters, enable ATR and reset */ if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)
need_reset = true;
/* We cannot enable ATR if SR-IOV is enabled */ if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED || /* We cannot enable ATR if we have 2 or more tcs */
(adapter->hw_tcs > 1) || /* We cannot enable ATR if RSS is disabled */
(adapter->ring_feature[RING_F_RSS].limit <= 1) || /* A sample rate of 0 indicates ATR disabled */
(!adapter->atr_sample_rate))
; /* do nothing not supported */ else/* otherwise supported and set the flag */
adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE;
}
/** * ixgbe_configure_bridge_mode - set various bridge modes * @adapter: the private structure * @mode: requested bridge mode * * Configure some settings require for various bridge modes.
**/ staticint ixgbe_configure_bridge_mode(struct ixgbe_adapter *adapter,
__u16 mode)
{ struct ixgbe_hw *hw = &adapter->hw; unsignedint p, num_pools;
u32 vmdctl;
switch (mode) { case BRIDGE_MODE_VEPA: /* disable Tx loopback, rely on switch hairpin mode */
IXGBE_WRITE_REG(&adapter->hw, IXGBE_PFDTXGSWC, 0);
/* must enable Rx switching replication to allow multicast * packet reception on all VFs, and to enable source address * pruning.
*/
vmdctl = IXGBE_READ_REG(hw, IXGBE_VMD_CTL);
vmdctl |= IXGBE_VT_CTL_REPLEN;
IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl);
/* enable Rx source address pruning. Note, this requires * replication to be enabled or else it does nothing.
*/
num_pools = adapter->num_vfs + adapter->num_rx_pools; for (p = 0; p < num_pools; p++) { if (hw->mac.ops.set_source_address_pruning)
hw->mac.ops.set_source_address_pruning(hw, true,
p);
} break; case BRIDGE_MODE_VEB: /* enable Tx loopback for internal VF/PF communication */
IXGBE_WRITE_REG(&adapter->hw, IXGBE_PFDTXGSWC,
IXGBE_PFDTXGSWC_VT_LBEN);
/* disable Rx switching replication unless we have SR-IOV * virtual functions
*/
vmdctl = IXGBE_READ_REG(hw, IXGBE_VMD_CTL); if (!adapter->num_vfs)
vmdctl &= ~IXGBE_VT_CTL_REPLEN;
IXGBE_WRITE_REG(hw, IXGBE_VMD_CTL, vmdctl);
/* disable Rx source address pruning, since we don't expect to * be receiving external loopback of our transmitted frames.
*/
num_pools = adapter->num_vfs + adapter->num_rx_pools; for (p = 0; p < num_pools; p++) { if (hw->mac.ops.set_source_address_pruning)
hw->mac.ops.set_source_address_pruning(hw, false,
p);
} break; default: return -EINVAL;
}
staticvoid *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
{ struct ixgbe_adapter *adapter = ixgbe_from_netdev(pdev); struct ixgbe_fwd_adapter *accel; int tcs = adapter->hw_tcs ? : 1; int pool, err;
if (adapter->xdp_prog) {
e_warn(probe, "L2FW offload is not supported with XDP\n"); return ERR_PTR(-EINVAL);
}
/* The hardware supported by ixgbe only filters on the destination MAC * address. In order to avoid issues we only support offloading modes * where the hardware can actually provide the functionality.
*/ if (!macvlan_supports_dest_filter(vdev)) return ERR_PTR(-EMEDIUMTYPE);
/* We need to lock down the macvlan to be a single queue device so that * we can reuse the tc_to_txq field in the macvlan netdev to represent * the queue mapping to our netdev.
*/ if (netif_is_multiqueue(vdev)) return ERR_PTR(-ERANGE);
pool = find_first_zero_bit(adapter->fwd_bitmask, adapter->num_rx_pools); if (pool == adapter->num_rx_pools) {
u16 used_pools = adapter->num_vfs + adapter->num_rx_pools;
u16 reserved_pools;
/* Hardware has a limited number of available pools. Each VF, * and the PF require a pool. Check to ensure we don't * attempt to use more then the available number of pools.
*/ if (used_pools >= IXGBE_MAX_VF_FUNCTIONS) return ERR_PTR(-EBUSY);
/* Enable VMDq flag so device will be set in VM mode */
adapter->flags |= IXGBE_FLAG_VMDQ_ENABLED |
IXGBE_FLAG_SRIOV_ENABLED;
/* Try to reserve as many queues per pool as possible, * we start with the configurations that support 4 queues * per pools, followed by 2, and then by just 1 per pool.
*/ if (used_pools < 32 && adapter->num_rx_pools < 16)
reserved_pools = min_t(u16,
32 - used_pools,
16 - adapter->num_rx_pools); elseif (adapter->num_rx_pools < 32)
reserved_pools = min_t(u16,
64 - used_pools,
32 - adapter->num_rx_pools); else
reserved_pools = 64 - used_pools;
/* Allow remaining Rx packets to get flushed out of the * Rx FIFO before we drop the netdev for the ring.
*/
usleep_range(10000, 20000);
for (i = 0; i < adapter->num_rx_queues_per_pool; i++) { struct ixgbe_ring *ring = adapter->rx_ring[rxbase + i]; struct ixgbe_q_vector *qv = ring->q_vector;
/* Make sure we aren't processing any packets and clear * netdev to shut down the ring.
*/ if (netif_running(adapter->netdev))
napi_synchronize(&qv->napi);
ring->netdev = NULL;
}
/* unbind the queues and drop the subordinate channel config */
netdev_unbind_sb_channel(pdev, accel->netdev);
netdev_set_sb_channel(accel->netdev, 0);
/* Make certain the headers can be described by a context descriptor */
mac_hdr_len = skb_network_offset(skb); if (unlikely(mac_hdr_len > IXGBE_MAX_MAC_HDR_LEN)) return features & ~(NETIF_F_HW_CSUM |
NETIF_F_SCTP_CRC |
NETIF_F_GSO_UDP_L4 |
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_TSO |
NETIF_F_TSO6);
/* We can only support IPV4 TSO in tunnels if we can mangle the * inner IP ID field, so strip TSO if MANGLEID is not supported. * IPsec offoad sets skb->encapsulation but still can handle * the TSO, so it's the exception.
*/ if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID)) { #ifdef CONFIG_IXGBE_IPSEC if (!secpath_exists(skb)) #endif
features &= ~NETIF_F_TSO;
}
if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) return -EINVAL;
if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) return -EINVAL;
/* verify ixgbe ring attributes are sufficient for XDP */ for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbe_ring *ring = adapter->rx_ring[i];
if (ring_is_rsc_enabled(ring)) return -EINVAL;
if (frame_size > ixgbe_rx_bufsz(ring)) return -EINVAL;
}
/* if the number of cpus is much larger than the maximum of queues, * we should stop it and then return with ENOMEM like before.
*/ if (nr_cpu_ids > IXGBE_MAX_XDP_QS * 2) return -ENOMEM;
/* If transitioning XDP modes reconfigure rings */ if (need_reset) { int err;
if (!prog) /* Wait until ndo_xsk_wakeup completes. */
synchronize_rcu();
err = ixgbe_setup_tc(dev, adapter->hw_tcs);
if (err) return -EINVAL; if (!prog)
xdp_features_clear_redirect_target(dev);
} else { for (i = 0; i < adapter->num_rx_queues; i++) {
WRITE_ONCE(adapter->rx_ring[i]->xdp_prog,
adapter->xdp_prog);
}
}
if (old_prog)
bpf_prog_put(old_prog);
/* Kick start the NAPI context if there is an AF_XDP socket open * on that queue id. This so that receiving will start.
*/ if (need_reset && prog) {
num_queues = min_t(int, adapter->num_rx_queues,
adapter->num_xdp_queues); for (i = 0; i < num_queues; i++) if (adapter->xdp_ring[i]->xsk_pool)
(void)ixgbe_xsk_wakeup(adapter->netdev, i,
XDP_WAKEUP_RX);
xdp_features_set_redirect_target(dev, true);
}
switch (xdp->command) { case XDP_SETUP_PROG: return ixgbe_xdp_setup(dev, xdp->prog); case XDP_SETUP_XSK_POOL: return ixgbe_xsk_pool_setup(adapter, xdp->xsk.pool,
xdp->xsk.queue_id);
default: return -EINVAL;
}
}
void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring)
{ /* Force memory writes to complete before letting h/w know there * are new descriptors to fetch.
*/
wmb();
writel(ring->next_to_use, ring->tail);
}
void ixgbe_xdp_ring_update_tail_locked(struct ixgbe_ring *ring)
{ if (static_branch_unlikely(&ixgbe_xdp_locking_key))
spin_lock(&ring->tx_lock);
ixgbe_xdp_ring_update_tail(ring); if (static_branch_unlikely(&ixgbe_xdp_locking_key))
spin_unlock(&ring->tx_lock);
}
staticint ixgbe_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags)
{ struct ixgbe_adapter *adapter = ixgbe_from_netdev(dev); struct ixgbe_ring *ring; int nxmit = 0; int i;
if (unlikely(test_bit(__IXGBE_DOWN, &adapter->state))) return -ENETDOWN;
if (!netif_carrier_ok(adapter->netdev) ||
!netif_running(adapter->netdev)) return -ENETDOWN;
if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL;
/* During program transitions its possible adapter->xdp_prog is assigned * but ring has not been configured yet. In this case simply abort xmit.
*/
ring = adapter->xdp_prog ? ixgbe_determine_xdp_ring(adapter) : NULL; if (unlikely(!ring)) return -ENXIO;
if (unlikely(test_bit(__IXGBE_TX_DISABLED, &ring->state))) return -ENXIO;
if (static_branch_unlikely(&ixgbe_xdp_locking_key))
spin_lock(&ring->tx_lock);
for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; int err;
/* write value back with RXDCTL.ENABLE bit cleared */
IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), rxdctl);
/* RXDCTL.EN may not change on 82598 if link is down, so skip it */ if (hw->mac.type == ixgbe_mac_82598EB &&
!(IXGBE_READ_REG(hw, IXGBE_LINKS) & IXGBE_LINKS_UP)) return;
switch (adapter->hw.mac.type) { case ixgbe_mac_82598EB:
mask = qmask & IXGBE_EIMC_RTX_QUEUE;
IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, mask); break; case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a:
mask = (qmask & 0xFFFFFFFF); if (mask)
IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(0), mask);
mask = (qmask >> 32); if (mask)
IXGBE_WRITE_REG(hw, IXGBE_EIMS_EX(1), mask); break; default: break;
}
IXGBE_WRITE_FLUSH(&adapter->hw); if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
synchronize_irq(adapter->msix_entries[ring].vector); else
synchronize_irq(adapter->pdev->irq);
}
/** * ixgbe_txrx_ring_disable - Disable Rx/Tx/XDP Tx rings * @adapter: adapter structure * @ring: ring index * * This function disables a certain Rx/Tx/XDP Tx ring. The function * assumes that the netdev is running.
**/ void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring)
{ struct ixgbe_ring *rx_ring, *tx_ring, *xdp_ring;
/* Rx/Tx/XDP Tx share the same napi context. */
napi_disable(&rx_ring->q_vector->napi);
ixgbe_disable_txr(adapter, tx_ring); if (xdp_ring)
ixgbe_disable_txr(adapter, xdp_ring);
ixgbe_disable_rxr_hw(adapter, rx_ring);
if (xdp_ring)
synchronize_rcu();
ixgbe_clean_tx_ring(tx_ring); if (xdp_ring)
ixgbe_clean_tx_ring(xdp_ring);
ixgbe_clean_rx_ring(rx_ring);
ixgbe_reset_txr_stats(tx_ring); if (xdp_ring)
ixgbe_reset_txr_stats(xdp_ring);
ixgbe_reset_rxr_stats(rx_ring);
}
/** * ixgbe_txrx_ring_enable - Enable Rx/Tx/XDP Tx rings * @adapter: adapter structure * @ring: ring index * * This function enables a certain Rx/Tx/XDP Tx ring. The function * assumes that the netdev is running.
**/ void ixgbe_txrx_ring_enable(struct ixgbe_adapter *adapter, int ring)
{ struct ixgbe_ring *rx_ring, *tx_ring, *xdp_ring;
ixgbe_configure_tx_ring(adapter, tx_ring); if (xdp_ring)
ixgbe_configure_tx_ring(adapter, xdp_ring);
ixgbe_configure_rx_ring(adapter, rx_ring);
clear_bit(__IXGBE_TX_DISABLED, &tx_ring->state); if (xdp_ring)
clear_bit(__IXGBE_TX_DISABLED, &xdp_ring->state);
/* Rx/Tx/XDP Tx share the same napi context. */
napi_enable(&rx_ring->q_vector->napi);
ixgbe_irq_enable_queues(adapter, BIT_ULL(ring));
IXGBE_WRITE_FLUSH(&adapter->hw);
}
/** * ixgbe_enumerate_functions - Get the number of ports this device has * @adapter: adapter structure * * This function enumerates the physical functions co-located on a single slot, * in order to determine how many ports a device has. This is most useful in * determining the required GT/s of PCIe bandwidth necessary for optimal * performance.
**/ staticinlineint ixgbe_enumerate_functions(struct ixgbe_adapter *adapter)
{ struct pci_dev *entry, *pdev = adapter->pdev; int physfns = 0;
/* Some cards can not use the generic count PCIe functions method, * because they are behind a parent switch, so we hardcode these with * the correct number of functions.
*/ if (ixgbe_pcie_from_parent(&adapter->hw))
physfns = 4;
/* When the devices on the bus don't all match our device ID, * we can't reliably determine the correct number of * functions. This can occur if a function has been direct * attached to a virtual machine using VT-d, for example. In * this case, simply return -1 to indicate this.
*/ if ((entry->vendor != pdev->vendor) ||
(entry->device != pdev->device)) return -1;
physfns++;
}
return physfns;
}
/** * ixgbe_wol_supported - Check whether device supports WoL * @adapter: the adapter private structure * @device_id: the device ID * @subdevice_id: the subsystem device ID * * This function is used by probe and ethtool to determine * which devices have WoL support *
**/ bool ixgbe_wol_supported(struct ixgbe_adapter *adapter, u16 device_id,
u16 subdevice_id)
{ struct ixgbe_hw *hw = &adapter->hw;
u16 wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK;
/* WOL not supported on 82598 */ if (hw->mac.type == ixgbe_mac_82598EB) returnfalse;
/* check eeprom to see if WOL is enabled for X540 and newer */ if (hw->mac.type >= ixgbe_mac_X540) { if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) ||
((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) &&
(hw->bus.func == 0))) returntrue;
}
/* WOL is determined based on device IDs for 82599 MACs */ switch (device_id) { case IXGBE_DEV_ID_82599_SFP: /* Only these subdevices could supports WOL */ switch (subdevice_id) { case IXGBE_SUBDEV_ID_82599_560FLR: case IXGBE_SUBDEV_ID_82599_LOM_SNAP6: case IXGBE_SUBDEV_ID_82599_SFP_WOL0: case IXGBE_SUBDEV_ID_82599_SFP_2OCP: /* only support first port */ if (hw->bus.func != 0) break;
fallthrough; case IXGBE_SUBDEV_ID_82599_SP_560FLR: case IXGBE_SUBDEV_ID_82599_SFP: case IXGBE_SUBDEV_ID_82599_RNDC: case IXGBE_SUBDEV_ID_82599_ECNA_DP: case IXGBE_SUBDEV_ID_82599_SFP_1OCP: case IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM1: case IXGBE_SUBDEV_ID_82599_SFP_LOM_OEM2: returntrue;
} break; case IXGBE_DEV_ID_82599EN_SFP: /* Only these subdevices support WOL */ switch (subdevice_id) { case IXGBE_SUBDEV_ID_82599EN_SFP_OCP1: returntrue;
} break; case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: /* All except this subdevice support WOL */ if (subdevice_id != IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) returntrue; break; case IXGBE_DEV_ID_82599_KX4: returntrue; default: break;
}
returnfalse;
}
/** * ixgbe_set_fw_version_e610 - Set FW version specifically on E610 adapters * @adapter: the adapter private structure * * This function is used by probe and ethtool to determine the FW version to * format to display. The FW version is taken from the EEPROM/NVM. *
*/ void ixgbe_set_fw_version_e610(struct ixgbe_adapter *adapter)
{ struct ixgbe_orom_info *orom = &adapter->hw.flash.orom; struct ixgbe_nvm_info *nvm = &adapter->hw.flash.nvm;
/** * ixgbe_set_fw_version - Set FW version * @adapter: the adapter private structure * * This function is used by probe and ethtool to determine the FW version to * format to display. The FW version is taken from the EEPROM/NVM.
*/ staticvoid ixgbe_set_fw_version(struct ixgbe_adapter *adapter)
{ struct ixgbe_hw *hw = &adapter->hw; struct ixgbe_nvm_version nvm_ver;
if (adapter->hw.mac.type == ixgbe_mac_e610) {
ixgbe_set_fw_version_e610(adapter); return;
}
if (hw->mac.ops.get_bus_info)
hw->mac.ops.get_bus_info(hw);
pci_set_drvdata(pdev, adapter); /* We are creating devlink interface so NIC can be managed, * e.g. new NVM image loaded
*/
devl_lock(adapter->devlink);
ixgbe_devlink_register_port(adapter);
SET_NETDEV_DEVLINK_PORT(adapter->netdev,
&adapter->devlink_port);
ixgbe_devlink_init_regions(adapter);
devl_register(adapter->devlink);
devl_unlock(adapter->devlink);
/** * ixgbe_probe - Device Initialization Routine * @pdev: PCI device information struct * @ent: entry in ixgbe_pci_tbl * * Returns 0 on success, negative on failure * * ixgbe_probe initializes an adapter identified by a pci_dev structure. * The OS initialization, configuring of the adapter private structure, * and a hardware reset occur.
**/ staticint ixgbe_probe(struct pci_dev *pdev, conststruct pci_device_id *ent)
{ struct net_device *netdev; struct ixgbe_netdevice_priv *netdev_priv_wrapper; struct ixgbe_adapter *adapter = NULL; struct ixgbe_hw *hw; conststruct ixgbe_info *ii = ixgbe_info_tbl[ent->driver_data]; unsignedint indices = MAX_TX_QUEUES;
u8 part_str[IXGBE_PBANUM_LENGTH]; int i, err, expected_gts; bool disable_dev = false; #ifdef IXGBE_FCOE
u16 device_caps; #endif
u32 eec;
/* Catch broken hardware that put the wrong VF device ID in * the PCIe SR-IOV capability.
*/ if (pdev->is_virtfn) {
WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
pci_name(pdev), pdev->vendor, pdev->device); return -EINVAL;
}
err = pci_enable_device_mem(pdev); if (err) return err;
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) {
dev_err(&pdev->dev, "No usable DMA configuration, aborting\n"); goto err_dma;
}
/* setup the private structure */
err = ixgbe_sw_init(adapter, ii); if (err) goto err_sw_init;
if (ixgbe_check_fw_error(adapter)) return ixgbe_recovery_probe(adapter);
if (adapter->hw.mac.type == ixgbe_mac_e610) {
err = ixgbe_get_caps(&adapter->hw); if (err)
dev_err(&pdev->dev, "ixgbe_get_caps failed %d\n", err);
err = ixgbe_get_flash_data(&adapter->hw); if (err) goto err_sw_init;
}
if (adapter->hw.mac.type == ixgbe_mac_82599EB)
adapter->flags2 |= IXGBE_FLAG2_AUTO_DISABLE_VF;
switch (adapter->hw.mac.type) { case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_e610:
netdev->udp_tunnel_nic_info = &ixgbe_udp_tunnels_x550; break; case ixgbe_mac_x550em_a:
netdev->udp_tunnel_nic_info = &ixgbe_udp_tunnels_x550em_a; break; default: break;
}
/* Make it possible the adapter to be woken up via WOL */ switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB: case ixgbe_mac_X540: case ixgbe_mac_X550: case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: case ixgbe_mac_e610:
IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0); break; default: break;
}
/* * If there is a fan on this device and it has failed log the * failure.
*/ if (adapter->flags & IXGBE_FLAG_FAN_FAIL_CAPABLE) {
u32 esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); if (esdp & IXGBE_ESDP_SDP1)
e_crit(probe, "Fan has stopped, replace the adapter\n");
}
if (allow_unsupported_sfp)
hw->allow_unsupported_sfp = allow_unsupported_sfp;
/* reset_hw fills in the perm_addr as well */
hw->phy.reset_if_overtemp = true;
err = hw->mac.ops.reset_hw(hw);
hw->phy.reset_if_overtemp = false;
ixgbe_set_eee_capable(adapter); if (err == -ENOENT) {
err = 0;
} elseif (err == -EOPNOTSUPP) {
e_dev_err("failed to load because an unsupported SFP+ or QSFP module type was detected.\n");
e_dev_err("Reload the driver after installing a supported module.\n"); goto err_sw_init;
} elseif (err) {
e_dev_err("HW Init failed: %d\n", err); goto err_sw_init;
}
#ifdef CONFIG_PCI_IOV /* SR-IOV not supported on the 82598 */ if (adapter->hw.mac.type == ixgbe_mac_82598EB) goto skip_sriov; /* Mailbox */
ixgbe_init_mbx_params_pf(hw);
hw->mbx.ops = ii->mbx_ops;
pci_sriov_set_totalvfs(pdev, IXGBE_MAX_VFS_DRV_LIMIT);
ixgbe_enable_sriov(adapter, max_vfs);
skip_sriov:
if (adapter->ipsec)
netdev->features |= IXGBE_ESP_FEATURES; #endif /* copy netdev features into list of user selectable features */
netdev->hw_features |= netdev->features |
NETIF_F_HW_VLAN_CTAG_FILTER |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_RXALL |
NETIF_F_HW_L2FW_DOFFLOAD;
if (hw->mac.type >= ixgbe_mac_82599EB)
netdev->hw_features |= NETIF_F_NTUPLE |
NETIF_F_HW_TC;
/* set this bit last since it cannot be part of vlan_features */
netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER |
NETIF_F_HW_VLAN_CTAG_RX |
NETIF_F_HW_VLAN_CTAG_TX;
/* make sure the EEPROM is good */ if (hw->eeprom.ops.validate_checksum(hw, NULL) < 0) {
e_dev_err("The EEPROM Checksum Is Not Valid\n");
err = -EIO; goto err_sw_init;
}
err = ixgbe_init_interrupt_scheme(adapter); if (err) goto err_sw_init;
for (i = 0; i < adapter->num_rx_queues; i++)
u64_stats_init(&adapter->rx_ring[i]->syncp); for (i = 0; i < adapter->num_tx_queues; i++)
u64_stats_init(&adapter->tx_ring[i]->syncp); for (i = 0; i < adapter->num_xdp_queues; i++)
u64_stats_init(&adapter->xdp_ring[i]->syncp);
/* WOL not supported for all devices */
adapter->wol = 0;
hw->eeprom.ops.read(hw, 0x2c, &adapter->eeprom_cap);
hw->wol_enabled = ixgbe_wol_supported(adapter, pdev->device,
pdev->subsystem_device); if (hw->wol_enabled)
adapter->wol = IXGBE_WUFC_MAG;
/* save off EEPROM version number */
ixgbe_set_fw_version(adapter);
/* pick up the PCI bus settings for reporting later */ if (ixgbe_pcie_from_parent(hw))
ixgbe_get_parent_bus_info(adapter); else
hw->mac.ops.get_bus_info(hw);
/* calculate the expected PCIe bandwidth required for optimal * performance. Note that some older parts will never have enough * bandwidth due to being older generation PCIe parts. We clamp these * parts to ensure no warning is displayed if it can't be fixed.
*/ switch (hw->mac.type) { case ixgbe_mac_82598EB:
expected_gts = min(ixgbe_enumerate_functions(adapter) * 10, 16); break; default:
expected_gts = ixgbe_enumerate_functions(adapter) * 10; break;
}
/* don't check link if we failed to enumerate functions */ if (expected_gts > 0)
ixgbe_check_minimum_link(adapter, expected_gts);
/* reset the hardware with the new settings */
err = hw->mac.ops.start_hw(hw); if (err == -EACCES) { /* We are running on a pre-production device, log a warning */
e_dev_warn("This device is a pre-production adapter/LOM. " "Please be aware there may be issues associated " "with your hardware. If you are experiencing " "problems please contact your Intel or hardware " "representative who provided you with this " "hardware.\n");
}
strcpy(netdev->name, "eth%d");
pci_set_drvdata(pdev, adapter);
err = register_netdev(netdev); if (err) goto err_register;
/* power down the optics for 82599 SFP+ fiber */ if (hw->mac.ops.disable_tx_laser)
hw->mac.ops.disable_tx_laser(hw);
/* carrier off reporting is important to ethtool even BEFORE open */
netif_carrier_off(netdev);
#ifdef CONFIG_IXGBE_DCA if (dca_add_requester(&pdev->dev) == 0) {
adapter->flags |= IXGBE_FLAG_DCA_ENABLED;
ixgbe_setup_dca(adapter);
} #endif if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
e_info(probe, "IOV is enabled with %d VFs\n", adapter->num_vfs); for (i = 0; i < adapter->num_vfs; i++)
ixgbe_vf_configuration(pdev, (i | 0x10000000));
}
/* firmware requires driver version to be 0xFFFFFFFF * since os does not support feature
*/ if (hw->mac.ops.set_fw_drv_ver)
hw->mac.ops.set_fw_drv_ver(hw, 0xFF, 0xFF, 0xFF, 0xFF, sizeof(UTS_RELEASE) - 1,
UTS_RELEASE);
/* add san mac addr to netdev */
ixgbe_add_sanmac_netdev(netdev);
e_dev_info("%s\n", ixgbe_default_device_descr);
#ifdef CONFIG_IXGBE_HWMON if (ixgbe_sysfs_init(adapter))
e_err(probe, "failed to allocate sysfs resources\n"); #endif/* CONFIG_IXGBE_HWMON */
ixgbe_dbg_adapter_init(adapter);
/* setup link for SFP devices with MNG FW, else wait for IXGBE_UP */ if (ixgbe_mng_enabled(hw) && ixgbe_is_sfp(hw) && hw->mac.ops.setup_link)
hw->mac.ops.setup_link(hw,
IXGBE_LINK_SPEED_10GB_FULL | IXGBE_LINK_SPEED_1GB_FULL, true);
err = ixgbe_mii_bus_init(hw); if (err) goto err_netdev;
/** * ixgbe_remove - Device Removal Routine * @pdev: PCI device information struct * * ixgbe_remove is called by the PCI subsystem to alert the driver * that it should release a PCI device. The could be caused by a * Hot-Plug event, or because the driver is going to be removed from * memory.
**/ staticvoid ixgbe_remove(struct pci_dev *pdev)
{ struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); struct net_device *netdev; bool disable_dev; int i;
/* if !adapter then we already cleaned up in probe */ if (!adapter) return;
for (i = 0; i < IXGBE_MAX_LINK_HANDLE; i++) { if (adapter->jump_tables[i]) {
kfree(adapter->jump_tables[i]->input);
kfree(adapter->jump_tables[i]->mask);
}
kfree(adapter->jump_tables[i]);
}
if (adapter->hw.mac.type == ixgbe_mac_e610)
mutex_destroy(&adapter->hw.aci.lock);
if (disable_dev)
pci_disable_device(pdev);
devlink_free(adapter->devlink);
}
/** * ixgbe_io_error_detected - called when PCI error is detected * @pdev: Pointer to PCI device * @state: The current pci connection state * * This function is called after a PCI bus error affecting * this device has been detected.
*/ static pci_ers_result_t ixgbe_io_error_detected(struct pci_dev *pdev,
pci_channel_state_t state)
{ struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); struct net_device *netdev = adapter->netdev;
vf = FIELD_GET(0x7F, req_id);
e_dev_err("VF %d has caused a PCIe error\n", vf);
e_dev_err("TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: " "%8.8x\tdw3: %8.8x\n",
dw0, dw1, dw2, dw3); switch (adapter->hw.mac.type) { case ixgbe_mac_82599EB:
device_id = IXGBE_82599_VF_DEVICE_ID; break; case ixgbe_mac_X540:
device_id = IXGBE_X540_VF_DEVICE_ID; break; case ixgbe_mac_X550:
device_id = IXGBE_DEV_ID_X550_VF; break; case ixgbe_mac_X550EM_x:
device_id = IXGBE_DEV_ID_X550EM_X_VF; break; case ixgbe_mac_x550em_a:
device_id = IXGBE_DEV_ID_X550EM_A_VF; break; case ixgbe_mac_e610:
device_id = IXGBE_DEV_ID_E610_VF; break; default:
device_id = 0; break;
}
/* Find the pci device of the offending VF */
vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, device_id, NULL); while (vfdev) { if (vfdev->devfn == (req_id & 0xFF)) break;
vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
device_id, vfdev);
} /* * There's a slim chance the VF could have been hot plugged, * so if it is no longer present we don't need to issue the * VFLR. Just clean up the AER in that case.
*/ if (vfdev) {
pcie_flr(vfdev); /* Free device reference count */
pci_dev_put(vfdev);
}
}
/* * Even though the error may have occurred on the other port * we still need to increment the vf error reference count for * both ports because the I/O resume function will be called * for both of them.
*/
adapter->vferr_refcount++;
return PCI_ERS_RESULT_RECOVERED;
skip_bad_vf_detection: #endif/* CONFIG_PCI_IOV */ if (!test_bit(__IXGBE_SERVICE_INITED, &adapter->state)) return PCI_ERS_RESULT_DISCONNECT;
if (!netif_device_present(netdev)) return PCI_ERS_RESULT_DISCONNECT;
rtnl_lock();
netif_device_detach(netdev);
if (netif_running(netdev))
ixgbe_close_suspend(adapter);
if (state == pci_channel_io_perm_failure) {
rtnl_unlock(); return PCI_ERS_RESULT_DISCONNECT;
}
if (!test_and_set_bit(__IXGBE_DISABLED, &adapter->state))
pci_disable_device(pdev);
rtnl_unlock();
/* Request a slot reset. */ return PCI_ERS_RESULT_NEED_RESET;
}
/** * ixgbe_io_slot_reset - called after the pci bus has been reset. * @pdev: Pointer to PCI device * * Restart the card from scratch, as if from a cold-boot.
*/ static pci_ers_result_t ixgbe_io_slot_reset(struct pci_dev *pdev)
{ struct ixgbe_adapter *adapter = pci_get_drvdata(pdev);
pci_ers_result_t result;
if (pci_enable_device_mem(pdev)) {
e_err(probe, "Cannot re-enable PCI device after reset.\n");
result = PCI_ERS_RESULT_DISCONNECT;
} else {
smp_mb__before_atomic();
clear_bit(__IXGBE_DISABLED, &adapter->state);
adapter->hw.hw_addr = adapter->io_addr;
pci_set_master(pdev);
pci_restore_state(pdev);
pci_save_state(pdev);
pci_wake_from_d3(pdev, false);
ixgbe_reset(adapter);
IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0);
result = PCI_ERS_RESULT_RECOVERED;
}
return result;
}
/** * ixgbe_io_resume - called when traffic can start flowing again. * @pdev: Pointer to PCI device * * This callback is called when the error recovery driver tells us that * its OK to resume normal operation.
*/ staticvoid ixgbe_io_resume(struct pci_dev *pdev)
{ struct ixgbe_adapter *adapter = pci_get_drvdata(pdev); struct net_device *netdev = adapter->netdev;
#ifdef CONFIG_PCI_IOV if (adapter->vferr_refcount) {
e_info(drv, "Resuming after VF err\n");
adapter->vferr_refcount--; return;
}
#endif
rtnl_lock(); if (netif_running(netdev))
ixgbe_open(netdev);
/** * ixgbe_init_module - Driver Registration Routine * * ixgbe_init_module is the first routine called when the driver is * loaded. All it does is register with the PCI subsystem.
**/ staticint __init ixgbe_init_module(void)
{ int ret;
pr_info("%s\n", ixgbe_driver_string);
pr_info("%s\n", ixgbe_copyright);
ixgbe_wq = create_singlethread_workqueue(ixgbe_driver_name); if (!ixgbe_wq) {
pr_err("%s: Failed to create workqueue\n", ixgbe_driver_name); return -ENOMEM;
}
ixgbe_dbg_init();
ret = pci_register_driver(&ixgbe_driver); if (ret) {
destroy_workqueue(ixgbe_wq);
ixgbe_dbg_exit(); return ret;
}
/** * ixgbe_exit_module - Driver Exit Cleanup Routine * * ixgbe_exit_module is called just before the driver is removed * from memory.
**/ staticvoid __exit ixgbe_exit_module(void)
{ #ifdef CONFIG_IXGBE_DCA
dca_unregister_notify(&dca_notifier); #endif
pci_unregister_driver(&ixgbe_driver);
ixgbe_dbg_exit(); if (ixgbe_wq) {
destroy_workqueue(ixgbe_wq);
ixgbe_wq = NULL;
}
}
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.396Bemerkung:
(vorverarbeitet am 2026-04-28)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.