/** * igbvf_receive_skb - helper function to handle Rx indications * @adapter: board private structure * @netdev: pointer to netdev struct * @skb: skb to indicate to stack * @status: descriptor status field as written by hardware * @vlan: descriptor vlan field as written by hardware (no le/be conversion) * @skb: pointer to sk_buff to be indicated to stack
**/ staticvoid igbvf_receive_skb(struct igbvf_adapter *adapter, struct net_device *netdev, struct sk_buff *skb,
u32 status, __le16 vlan)
{
u16 vid;
if (status & E1000_RXD_STAT_VP) { if ((adapter->flags & IGBVF_FLAG_RX_LB_VLAN_BSWAP) &&
(status & E1000_RXDEXT_STATERR_LB))
vid = be16_to_cpu((__force __be16)vlan) & E1000_RXD_SPC_VLAN_MASK; else
vid = le16_to_cpu(vlan) & E1000_RXD_SPC_VLAN_MASK; if (test_bit(vid, adapter->active_vlans))
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
}
/* Ignore Checksum bit is set or checksum is disabled through ethtool */ if ((status_err & E1000_RXD_STAT_IXSM) ||
(adapter->flags & IGBVF_FLAG_RX_CSUM_DISABLED)) return;
/* TCP/UDP checksum error bit is set */ if (status_err &
(E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) { /* let the stack verify checksum errors */
adapter->hw_csum_err++; return;
}
/* It must be a TCP or UDP packet with a valid checksum */ if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
skb->ip_summed = CHECKSUM_UNNECESSARY;
adapter->hw_csum_good++;
}
/** * igbvf_alloc_rx_buffers - Replace used receive buffers; packet split * @rx_ring: address of ring structure to repopulate * @cleaned_count: number of buffers to repopulate
**/ staticvoid igbvf_alloc_rx_buffers(struct igbvf_ring *rx_ring, int cleaned_count)
{ struct igbvf_adapter *adapter = rx_ring->adapter; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; union e1000_adv_rx_desc *rx_desc; struct igbvf_buffer *buffer_info; struct sk_buff *skb; unsignedint i; int bufsz;
i = rx_ring->next_to_use;
buffer_info = &rx_ring->buffer_info[i];
if (adapter->rx_ps_hdr_size)
bufsz = adapter->rx_ps_hdr_size; else
bufsz = adapter->rx_buffer_len;
while (cleaned_count--) {
rx_desc = IGBVF_RX_DESC_ADV(*rx_ring, i);
if (!buffer_info->skb) {
skb = netdev_alloc_skb_ip_align(netdev, bufsz); if (!skb) {
adapter->alloc_rx_buff_failed++; goto no_buffers;
}
buffer_info->skb = skb;
buffer_info->dma = dma_map_single(&pdev->dev, skb->data,
bufsz,
DMA_FROM_DEVICE); if (dma_mapping_error(&pdev->dev, buffer_info->dma)) {
dev_kfree_skb(buffer_info->skb);
buffer_info->skb = NULL;
dev_err(&pdev->dev, "RX DMA map failed\n"); goto no_buffers;
}
} /* Refresh the desc even if buffer_addrs didn't change because * each write-back erases this info.
*/ if (adapter->rx_ps_hdr_size) {
rx_desc->read.pkt_addr =
cpu_to_le64(buffer_info->page_dma);
rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
} else {
rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
rx_desc->read.hdr_addr = 0;
}
i++; if (i == rx_ring->count)
i = 0;
buffer_info = &rx_ring->buffer_info[i];
}
no_buffers: if (rx_ring->next_to_use != i) {
rx_ring->next_to_use = i; if (i == 0)
i = (rx_ring->count - 1); else
i--;
/* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64).
*/
wmb();
writel(i, adapter->hw.hw_addr + rx_ring->tail);
}
}
/** * igbvf_clean_rx_irq - Send received data up the network stack; legacy * @adapter: board private structure * @work_done: output parameter used to indicate completed work * @work_to_do: input parameter setting limit of work * * the return value indicates whether actual cleaning was done, there * is no guarantee that everything was cleaned
**/ staticbool igbvf_clean_rx_irq(struct igbvf_adapter *adapter, int *work_done, int work_to_do)
{ struct igbvf_ring *rx_ring = adapter->rx_ring; struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; union e1000_adv_rx_desc *rx_desc, *next_rxd; struct igbvf_buffer *buffer_info, *next_buffer; struct sk_buff *skb; bool cleaned = false; int cleaned_count = 0; unsignedint total_bytes = 0, total_packets = 0; unsignedint i;
u32 length, hlen, staterr;
i = rx_ring->next_to_clean;
rx_desc = IGBVF_RX_DESC_ADV(*rx_ring, i);
staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
while (staterr & E1000_RXD_STAT_DD) { if (*work_done >= work_to_do) break;
(*work_done)++;
rmb(); /* read descriptor and rx_buffer_info after status DD */
buffer_info = &rx_ring->buffer_info[i];
/* HW will not DMA in data larger than the given buffer, even * if it parses the (NFS, of course) header to be larger. In * that case, it fills the header buffer and spills the rest * into the page.
*/
hlen = le16_get_bits(rx_desc->wb.lower.lo_dword.hs_rss.hdr_info,
E1000_RXDADV_HDRBUFLEN_MASK); if (hlen > adapter->rx_ps_hdr_size)
hlen = adapter->rx_ps_hdr_size;
/* return some buffers to hardware, one at a time is too slow */ if (cleaned_count >= IGBVF_RX_BUFFER_WRITE) {
igbvf_alloc_rx_buffers(rx_ring, cleaned_count);
cleaned_count = 0;
}
/* Free all the Tx ring sk_buffs */ for (i = 0; i < tx_ring->count; i++) {
buffer_info = &tx_ring->buffer_info[i];
igbvf_put_txbuf(adapter, buffer_info);
}
/** * igbvf_update_itr - update the dynamic ITR value based on statistics * @adapter: pointer to adapter * @itr_setting: current adapter->itr * @packets: the number of packets during this measurement interval * @bytes: the number of bytes during this measurement interval * * Stores a new ITR value based on packets and byte counts during the last * interrupt. The advantage of per interrupt computation is faster updates * and more accurate ITR for the current traffic pattern. Constants in this * function were computed based on theoretical maximum wire speed and thresholds * were set based on testing data as well as attempting to minimize response * time while increasing bulk throughput.
**/ staticenum latency_range igbvf_update_itr(struct igbvf_adapter *adapter, enum latency_range itr_setting, int packets, int bytes)
{ enum latency_range retval = itr_setting;
eop_desc = buffer_info->next_to_watch;
} while (count < tx_ring->count);
tx_ring->next_to_clean = i;
if (unlikely(count && netif_carrier_ok(netdev) &&
igbvf_desc_unused(tx_ring) >= IGBVF_TX_QUEUE_WAKE)) { /* Make sure that anybody stopping the queue after this * sees the new next_to_clean.
*/
smp_mb(); if (netif_queue_stopped(netdev) &&
!(test_bit(__IGBVF_DOWN, &adapter->state))) {
netif_wake_queue(netdev);
++adapter->restart_queue;
}
}
/* auto mask will automatically re-enable the interrupt when we write * EICS
*/ if (!igbvf_clean_tx_irq(tx_ring)) /* Ring was not completely cleaned, so fire another interrupt */
ew32(EICS, tx_ring->eims_value); else
ew32(EIMS, tx_ring->eims_value);
/* Write the ITR value calculated at the end of the * previous interrupt.
*/ if (adapter->rx_ring->set_itr) {
writel(adapter->rx_ring->itr_val,
adapter->hw.hw_addr + adapter->rx_ring->itr_register);
adapter->rx_ring->set_itr = 0;
}
staticvoid igbvf_assign_vector(struct igbvf_adapter *adapter, int rx_queue, int tx_queue, int msix_vector)
{ struct e1000_hw *hw = &adapter->hw;
u32 ivar, index;
/* 82576 uses a table-based method for assigning vectors. * Each queue has a single entry in the table to which we write * a vector number along with a "valid" bit. Sadly, the layout * of the table is somewhat counterintuitive.
*/ if (rx_queue > IGBVF_NO_QUEUE) {
index = (rx_queue >> 1);
ivar = array_er32(IVAR0, index); if (rx_queue & 0x1) { /* vector goes into third byte of register */
ivar = ivar & 0xFF00FFFF;
ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
} else { /* vector goes into low byte of register */
ivar = ivar & 0xFFFFFF00;
ivar |= msix_vector | E1000_IVAR_VALID;
}
adapter->rx_ring[rx_queue].eims_value = BIT(msix_vector);
array_ew32(IVAR0, index, ivar);
} if (tx_queue > IGBVF_NO_QUEUE) {
index = (tx_queue >> 1);
ivar = array_er32(IVAR0, index); if (tx_queue & 0x1) { /* vector goes into high byte of register */
ivar = ivar & 0x00FFFFFF;
ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
} else { /* vector goes into second byte of register */
ivar = ivar & 0xFFFF00FF;
ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
}
adapter->tx_ring[tx_queue].eims_value = BIT(msix_vector);
array_ew32(IVAR0, index, ivar);
}
}
/** * igbvf_set_interrupt_capability - set MSI or MSI-X if supported * @adapter: board private structure * * Attempt to configure interrupts using the best available * capabilities of the hardware and kernel.
**/ staticvoid igbvf_set_interrupt_capability(struct igbvf_adapter *adapter)
{ int err = -ENOMEM; int i;
/* we allocate 3 vectors, 1 for Tx, 1 for Rx, one for PF messages */
adapter->msix_entries = kcalloc(3, sizeof(struct msix_entry),
GFP_KERNEL); if (adapter->msix_entries) { for (i = 0; i < 3; i++)
adapter->msix_entries[i].entry = i;
/** * igbvf_request_irq - initialize interrupts * @adapter: board private structure * * Attempts to configure interrupts using the best available * capabilities of the hardware and kernel.
**/ staticint igbvf_request_irq(struct igbvf_adapter *adapter)
{ int err = -1;
/* igbvf supports msi-x only */ if (adapter->msix_entries)
err = igbvf_request_msix(adapter);
if (!err) return err;
dev_err(&adapter->pdev->dev, "Unable to allocate interrupt, Error: %d\n", err);
/** * igbvf_poll - NAPI Rx polling callback * @napi: struct associated with this polling callback * @budget: amount of packets driver is allowed to process this poll
**/ staticint igbvf_poll(struct napi_struct *napi, int budget)
{ struct igbvf_ring *rx_ring = container_of(napi, struct igbvf_ring, napi); struct igbvf_adapter *adapter = rx_ring->adapter; struct e1000_hw *hw = &adapter->hw; int work_done = 0;
igbvf_clean_rx_irq(adapter, &work_done, budget);
if (work_done == budget) return budget;
/* Exit the polling mode, but don't re-enable interrupts if stack might * poll us due to busy-polling
*/ if (likely(napi_complete_done(napi, work_done))) { if (adapter->requested_itr & 3)
igbvf_set_itr(adapter);
if (!test_bit(__IGBVF_DOWN, &adapter->state))
ew32(EIMS, adapter->rx_ring->eims_value);
}
return work_done;
}
/** * igbvf_set_rlpml - set receive large packet maximum length * @adapter: board private structure * * Configure the maximum size of packets that will be received
*/ staticvoid igbvf_set_rlpml(struct igbvf_adapter *adapter)
{ int max_frame_size; struct e1000_hw *hw = &adapter->hw;
if (hw->mac.ops.set_vfta(hw, vid, true)) {
dev_warn(&adapter->pdev->dev, "Vlan id %d\n is not added", vid);
spin_unlock_bh(&hw->mbx_lock); return -EINVAL;
}
/* Turn off Relaxed Ordering on head write-backs. The writebacks * MUST be delivered in order or it will completely screw up * our bookkeeping.
*/
dca_txctrl = er32(DCA_TXCTRL(0));
dca_txctrl &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN;
ew32(DCA_TXCTRL(0), dca_txctrl);
/* Setup the HW Rx Head and Tail Descriptor Pointers and * the Base and Length of the Rx Descriptor Ring
*/
rdba = rx_ring->dma;
ew32(RDBAL(0), (rdba & DMA_BIT_MASK(32)));
ew32(RDBAH(0), (rdba >> 32));
ew32(RDLEN(0), rx_ring->count * sizeof(union e1000_adv_rx_desc));
rx_ring->head = E1000_RDH(0);
rx_ring->tail = E1000_RDT(0);
ew32(RDH(0), 0);
ew32(RDT(0), 0);
/** * igbvf_set_multi - Multicast and Promiscuous mode set * @netdev: network interface device structure * * The set_multi entry point is called whenever the multicast address * list or the network interface flags are updated. This routine is * responsible for configuring the hardware for proper multicast, * promiscuous mode, and all-multi behavior.
**/ staticvoid igbvf_set_multi(struct net_device *netdev)
{ struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; struct netdev_hw_addr *ha;
u8 *mta_list = NULL; int i;
if (!netdev_mc_empty(netdev)) {
mta_list = kmalloc_array(netdev_mc_count(netdev), ETH_ALEN,
GFP_ATOMIC); if (!mta_list) return;
}
/* prepare a packed array of only addresses. */
i = 0;
netdev_for_each_mc_addr(ha, netdev)
memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
spin_lock_bh(&hw->mbx_lock);
hw->mac.ops.update_mc_addr_list(hw, mta_list, i, 0, 0);
spin_unlock_bh(&hw->mbx_lock);
kfree(mta_list);
}
/** * igbvf_set_uni - Configure unicast MAC filters * @netdev: network interface device structure * * This routine is responsible for configuring the hardware for proper * unicast filters.
**/ staticint igbvf_set_uni(struct net_device *netdev)
{ struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw;
if (netdev_uc_count(netdev) > IGBVF_MAX_MAC_FILTERS) {
pr_err("Too many unicast filters - No Space\n"); return -ENOSPC;
}
spin_lock_bh(&hw->mbx_lock);
/* Clear all unicast MAC filters */
hw->mac.ops.set_uc_addr(hw, E1000_VF_MAC_FILTER_CLR, NULL);
spin_unlock_bh(&hw->mbx_lock);
if (!netdev_uc_empty(netdev)) { struct netdev_hw_addr *ha;
/* Add MAC filters one by one */
netdev_for_each_uc_addr(ha, netdev) {
spin_lock_bh(&hw->mbx_lock);
/* igbvf_reset - bring the hardware into a known good state * @adapter: private board structure * * This function boots the hardware and enables some settings that * require a configuration cycle of the hardware - those cannot be * set/changed during runtime. After reset the device needs to be * properly configured for Rx, Tx etc.
*/ staticvoid igbvf_reset(struct igbvf_adapter *adapter)
{ struct e1000_mac_info *mac = &adapter->hw.mac; struct net_device *netdev = adapter->netdev; struct e1000_hw *hw = &adapter->hw;
spin_lock_bh(&hw->mbx_lock);
/* Allow time for pending master requests to run */ if (mac->ops.reset_hw(hw))
dev_info(&adapter->pdev->dev, "PF still resetting\n");
mac->ops.init_hw(hw);
spin_unlock_bh(&hw->mbx_lock);
if (is_valid_ether_addr(adapter->hw.mac.addr)) {
eth_hw_addr_set(netdev, adapter->hw.mac.addr);
memcpy(netdev->perm_addr, adapter->hw.mac.addr,
netdev->addr_len);
}
adapter->last_reset = jiffies;
}
int igbvf_up(struct igbvf_adapter *adapter)
{ struct e1000_hw *hw = &adapter->hw;
/* hardware has been reset, we need to reload some things */
igbvf_configure(adapter);
clear_bit(__IGBVF_DOWN, &adapter->state);
napi_enable(&adapter->rx_ring->napi); if (adapter->msix_entries)
igbvf_configure_msix(adapter);
/* Clear any pending interrupts. */
er32(EICR);
igbvf_irq_enable(adapter);
/** * igbvf_open - Called when a network interface is made active * @netdev: network interface device structure * * Returns 0 on success, negative value on failure * * The open entry point is called when a network interface is made * active by the system (IFF_UP). At this point all resources needed * for transmit and receive operations are allocated, the interrupt * handler is registered with the OS, the watchdog timer is started, * and the stack is notified that the interface is ready.
**/ staticint igbvf_open(struct net_device *netdev)
{ struct igbvf_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; int err;
/* disallow open during test */ if (test_bit(__IGBVF_TESTING, &adapter->state)) return -EBUSY;
/* before we allocate an interrupt, we must be ready to handle it. * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt * as soon as we call pci_request_irq, so we have to setup our * clean_rx handler before we do so.
*/
igbvf_configure(adapter);
err = igbvf_request_irq(adapter); if (err) goto err_req_irq;
/* From here on the code is the same as igbvf_up() */
clear_bit(__IGBVF_DOWN, &adapter->state);
/** * igbvf_close - Disables a network interface * @netdev: network interface device structure * * Returns 0, this is not allowed to fail * * The close entry point is called when an interface is de-activated * by the OS. The hardware is still under the drivers control, but * needs to be disabled. A global MAC reset is issued to stop the * hardware, and all transmit and receive resources are freed.
**/ staticint igbvf_close(struct net_device *netdev)
{ struct igbvf_adapter *adapter = netdev_priv(netdev);
/* If interface is down, stay link down */ if (test_bit(__IGBVF_DOWN, &adapter->state)) returnfalse;
spin_lock_bh(&hw->mbx_lock);
ret_val = hw->mac.ops.check_for_link(hw);
spin_unlock_bh(&hw->mbx_lock);
link_active = !hw->mac.get_link_status;
/* if check for link returns error we will need to reset */ if (ret_val && time_after(jiffies, adapter->last_reset + (10 * HZ)))
schedule_work(&adapter->reset_task);
if (link) { if (!netif_carrier_ok(netdev)) {
mac->ops.get_link_up_info(&adapter->hw,
&adapter->link_speed,
&adapter->link_duplex);
igbvf_print_link_info(adapter);
if (netif_carrier_ok(netdev)) {
igbvf_update_stats(adapter);
} else {
tx_pending = (igbvf_desc_unused(tx_ring) + 1 <
tx_ring->count); if (tx_pending) { /* We've lost link, so the controller stops DMA, * but we've got queued Tx work that's never going * to get done, so reset controller to flush Tx. * (Do the reset outside of interrupt context).
*/
adapter->tx_timeout_count++;
schedule_work(&adapter->reset_task);
}
}
/* Cause software interrupt to ensure Rx ring is cleaned */
ew32(EICS, adapter->rx_ring->eims_value);
/* Reset the timer */ if (!test_bit(__IGBVF_DOWN, &adapter->state))
mod_timer(&adapter->watchdog_timer,
round_jiffies(jiffies + (2 * HZ)));
}
/* IP header will have to cancel out any data that * is not a part of the outer IP header
*/
ip.v4->check = csum_fold(csum_partial(trans_start,
csum_start - trans_start,
0));
type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
/* clear timestamp and dma mappings for failed buffer_info mapping */
buffer_info->dma = 0;
buffer_info->time_stamp = 0;
buffer_info->length = 0;
buffer_info->mapped_as_page = false; if (count)
count--;
/* clear timestamp and dma mappings for remaining portion of packet */ while (count--) { if (i == 0)
i += tx_ring->count;
i--;
buffer_info = &tx_ring->buffer_info[i];
igbvf_put_txbuf(adapter, buffer_info);
}
i = tx_ring->next_to_use; while (count--) {
buffer_info = &tx_ring->buffer_info[i];
tx_desc = IGBVF_TX_DESC_ADV(*tx_ring, i);
tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
tx_desc->read.cmd_type_len =
cpu_to_le32(cmd_type_len | buffer_info->length);
tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
i++; if (i == tx_ring->count)
i = 0;
}
tx_desc->read.cmd_type_len |= cpu_to_le32(adapter->txd_cmd); /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, * such as IA-64).
*/
wmb();
if (test_bit(__IGBVF_DOWN, &adapter->state)) {
dev_kfree_skb_any(skb); return NETDEV_TX_OK;
}
if (skb->len <= 0) {
dev_kfree_skb_any(skb); return NETDEV_TX_OK;
}
/* need: count + 4 desc gap to keep tail from touching * + 2 desc gap to keep tail from touching head, * + 1 desc for skb->data, * + 1 desc for context descriptor, * head, otherwise try next time
*/ if (igbvf_maybe_stop_tx(netdev, skb_shinfo(skb)->nr_frags + 4)) { /* this is a hard error */ return NETDEV_TX_BUSY;
}
/* count reflects descriptors mapped, if 0 then mapping error * has occurred and we need to rewind the descriptor queue
*/
count = igbvf_tx_map_adv(adapter, tx_ring, skb);
if (count) {
igbvf_tx_queue_adv(adapter, tx_ring, tx_flags, count,
first, skb->len, hdr_len); /* Make sure there is space in the ring for the next send. */
igbvf_maybe_stop_tx(netdev, MAX_SKB_FRAGS + 4);
} else {
dev_kfree_skb_any(skb);
tx_ring->buffer_info[first].time_stamp = 0;
tx_ring->next_to_use = first;
}
/** * igbvf_change_mtu - Change the Maximum Transfer Unit * @netdev: network interface device structure * @new_mtu: new value for maximum frame size * * Returns 0 on success, negative on failure
**/ staticint igbvf_change_mtu(struct net_device *netdev, int new_mtu)
{ struct igbvf_adapter *adapter = netdev_priv(netdev); int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
while (test_and_set_bit(__IGBVF_RESETTING, &adapter->state))
usleep_range(1000, 2000); /* igbvf_down has a dependency on max_frame_size */
adapter->max_frame_size = max_frame; if (netif_running(netdev))
igbvf_down(adapter);
/* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN * means we reserve 2 more, this pushes us to allocate from the next * larger slab size. * i.e. RXBUFFER_2048 --> size-4096 slab * However with the new *_jumbo_rx* routines, jumbo receives will use * fragmented skbs
*/
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.