/************************************************************************** * * Type name strings * **************************************************************************
*/
/* Reset workqueue. If any NIC has a hardware failure then a reset will be * queued onto this work queue. This is not a per-nic work queue, because * ef4_reset_work() acquires the rtnl lock, so resets are naturally serialised.
*/ staticstruct workqueue_struct *reset_workqueue;
/* How often and how many times to poll for a reset while waiting for a * BIST that another function started to complete.
*/ #define BIST_WAIT_DELAY_MS 100 #define BIST_WAIT_DELAY_COUNT 100
/* * Use separate channels for TX and RX events * * Set this to 1 to use separate channels for TX and RX. It allows us * to control interrupt affinity separately for TX and RX. * * This is only used in MSI-X interrupt mode
*/ bool ef4_separate_tx_channels;
module_param(ef4_separate_tx_channels, bool, 0444);
MODULE_PARM_DESC(ef4_separate_tx_channels, "Use separate channels for TX and RX");
/* This is the time (in jiffies) between invocations of the hardware * monitor. * On Falcon-based NICs, this will: * - Check the on-board hardware monitor; * - Poll the link state and reconfigure the hardware as necessary. * On Siena-based NICs for power systems with EEH support, this will give EEH a * chance to start.
*/ staticunsignedint ef4_monitor_interval = 1 * HZ;
/* Initial interrupt moderation settings. They can be modified after * module load with ethtool. * * The default for RX should strike a balance between increasing the * round-trip latency and reducing overhead.
*/ staticunsignedint rx_irq_mod_usec = 60;
/* Initial interrupt moderation settings. They can be modified after * module load with ethtool. * * This default is chosen to ensure that a 10G link does not go idle * while a TX queue is stopped after it has become full. A queue is * restarted when it drops below half full. The time this takes (assuming * worst case 3 descriptors per packet and 1024 descriptors) is * 512 / 3 * 1.2 = 205 usec.
*/ staticunsignedint tx_irq_mod_usec = 150;
/* This is the first interrupt mode to try out of: * 0 => MSI-X * 1 => MSI * 2 => legacy
*/ staticunsignedint interrupt_mode;
/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS), * i.e. the number of CPUs among which we may distribute simultaneous * interrupt handling. * * Cards without MSI-X will only target one CPU via legacy or MSI interrupt. * The default (0) means to assign an interrupt to each core.
*/ staticunsignedint rss_cpus;
module_param(rss_cpus, uint, 0444);
MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
/* Process channel's event queue * * This function is responsible for processing the event queue of a * single channel. The caller must guarantee that this function will * never be concurrently called more than once on the same channel, * though different channels may be being processed concurrently.
*/ staticint ef4_process_channel(struct ef4_channel *channel, int budget)
{ struct ef4_tx_queue *tx_queue; int spent;
/* NAPI poll handler * * NAPI guarantees serialisation of polls of the same device, which * provides the guarantee required by ef4_process_channel().
*/ staticvoid ef4_update_irq_mod(struct ef4_nic *efx, struct ef4_channel *channel)
{ int step = efx->irq_mod_step_us;
netif_vdbg(efx, intr, efx->net_dev, "channel %d NAPI poll executing on CPU %d\n",
channel->channel, raw_smp_processor_id());
spent = ef4_process_channel(channel, budget);
if (spent < budget) { if (ef4_channel_has_rx_queue(channel) &&
efx->irq_rx_adaptive &&
unlikely(++channel->irq_count == 1000)) {
ef4_update_irq_mod(efx, channel);
}
ef4_filter_rfs_expire(channel);
/* There is no race here; although napi_disable() will * only wait for napi_complete(), this isn't a problem * since ef4_nic_eventq_read_ack() will have no effect if * interrupts have already been disabled.
*/
napi_complete_done(napi, spent);
ef4_nic_eventq_read_ack(channel);
}
return spent;
}
/* Create event queue * Event queue memory allocations are done only once. If the channel * is reset, the memory buffer will be reused; this guards against * errors during channel reset and also simplifies interrupt handling.
*/ staticint ef4_probe_eventq(struct ef4_channel *channel)
{ struct ef4_nic *efx = channel->efx; unsignedlong entries;
/* Build an event queue with room for one event per tx and rx buffer,
* plus some extra for link state events and MCDI completions. */
entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
EF4_BUG_ON_PARANOID(entries > EF4_MAX_EVQ_SIZE);
channel->eventq_mask = max(entries, EF4_MIN_EVQ_SIZE) - 1;
staticint ef4_probe_channels(struct ef4_nic *efx)
{ struct ef4_channel *channel; int rc;
/* Restart special buffer allocation */
efx->next_buffer_table = 0;
/* Probe channels in reverse, so that any 'extra' channels * use the start of the buffer table. This allows the traffic * channels to be resized without moving them or wasting the * entries before them.
*/
ef4_for_each_channel_rev(channel, efx) {
rc = ef4_probe_channel(channel); if (rc) {
netif_err(efx, probe, efx->net_dev, "failed to create channel %d\n",
channel->channel); goto fail;
}
}
ef4_set_channel_names(efx);
return 0;
fail:
ef4_remove_channels(efx); return rc;
}
/* Channels are shutdown and reinitialised whilst the NIC is running * to propagate configuration changes (mtu, checksum offload), or * to clear hardware error conditions
*/ staticvoid ef4_start_datapath(struct ef4_nic *efx)
{
netdev_features_t old_features = efx->net_dev->features; bool old_rx_scatter = efx->rx_scatter; struct ef4_tx_queue *tx_queue; struct ef4_rx_queue *rx_queue; struct ef4_channel *channel;
size_t rx_buf_len;
/* Calculate the rx buffer allocation parameters required to * support the current MTU, including padding for header * alignment and overruns.
*/
efx->rx_dma_len = (efx->rx_prefix_size +
EF4_MAX_FRAME_LEN(efx->net_dev->mtu) +
efx->type->rx_buffer_padding);
rx_buf_len = (sizeof(struct ef4_rx_page_state) +
efx->rx_ip_align + efx->rx_dma_len); if (rx_buf_len <= PAGE_SIZE) {
efx->rx_scatter = efx->type->always_rx_scatter;
efx->rx_buffer_order = 0;
} elseif (efx->type->can_rx_scatter) {
BUILD_BUG_ON(EF4_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
BUILD_BUG_ON(sizeof(struct ef4_rx_page_state) +
2 * ALIGN(NET_IP_ALIGN + EF4_RX_USR_BUF_SIZE,
EF4_RX_BUF_ALIGNMENT) >
PAGE_SIZE);
efx->rx_scatter = true;
efx->rx_dma_len = EF4_RX_USR_BUF_SIZE;
efx->rx_buffer_order = 0;
} else {
efx->rx_scatter = false;
efx->rx_buffer_order = get_order(rx_buf_len);
}
/* Restore previously fixed features in hw_features and remove * features which are fixed now
*/
efx->net_dev->hw_features |= efx->net_dev->features;
efx->net_dev->hw_features &= ~efx->fixed_features;
efx->net_dev->features |= efx->fixed_features; if (efx->net_dev->features != old_features)
netdev_features_change(efx->net_dev);
/* RX filters may also have scatter-enabled flags */ if (efx->rx_scatter != old_rx_scatter)
efx->type->filter_update_rx_scatter(efx);
/* We must keep at least one descriptor in a TX ring empty. * We could avoid this when the queue size does not exactly * match the hardware ring size, but it's not that important. * Therefore we stop the queue when one more skb might fill * the ring completely. We wake it when half way back to * empty.
*/
efx->txq_stop_thresh = efx->txq_entries - ef4_tx_max_skb_descs(efx);
efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
ef4_for_each_channel(channel, efx) { /* RX packet processing is pipelined, so wait for the * NAPI handler to complete. At least event queue 0 * might be kept active by non-data events, so don't * use napi_synchronize() but actually disable NAPI * temporarily.
*/ if (ef4_channel_has_rx_queue(channel)) {
ef4_stop_eventq(channel);
ef4_start_eventq(channel);
}
}
rc = efx->type->fini_dmaq(efx); if (rc && EF4_WORKAROUND_7803(efx)) { /* Schedule a reset to recover from the flush failure. The * descriptor caches reference memory we're about to free, * but falcon_reconfigure_mac_wrapper() won't reconnect * the MACs because of the pending reset.
*/
netif_err(efx, drv, efx->net_dev, "Resetting to recover from flush failure\n");
ef4_schedule_reset(efx, RESET_TYPE_ALL);
} elseif (rc) {
netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
} else {
netif_dbg(efx, drv, efx->net_dev, "successfully flushed all queues\n");
}
int
ef4_realloc_channels(struct ef4_nic *efx, u32 rxq_entries, u32 txq_entries)
{ struct ef4_channel *other_channel[EF4_MAX_CHANNELS], *channel;
u32 old_rxq_entries, old_txq_entries; unsigned i, next_buffer_table = 0; int rc, rc2;
rc = ef4_check_disabled(efx); if (rc) return rc;
/* Not all channels should be reallocated. We must avoid * reallocating their buffer table entries.
*/
ef4_for_each_channel(channel, efx) { struct ef4_rx_queue *rx_queue; struct ef4_tx_queue *tx_queue;
for (i = 0; i < efx->n_channels; i++) {
channel = efx->channel[i]; if (!channel->type->copy) continue;
rc = ef4_probe_channel(channel); if (rc) goto rollback;
ef4_init_napi_channel(efx->channel[i]);
}
out: /* Destroy unused channel structures */ for (i = 0; i < efx->n_channels; i++) {
channel = other_channel[i]; if (channel && channel->type->copy) {
ef4_fini_napi_channel(channel);
ef4_remove_channel(channel);
kfree(channel);
}
}
/************************************************************************** * * Port handling *
**************************************************************************/
/* This ensures that the kernel is kept informed (via * netif_carrier_on/off) of the link status, and also maintains the * link status's stop on the port's TX queue.
*/ void ef4_link_status_changed(struct ef4_nic *efx)
{ struct ef4_link_state *link_state = &efx->link_state;
/* SFC Bug 5356: A net_dev notifier is registered, so we must ensure * that no events are triggered between unregister_netdev() and the * driver unloading. A more general condition is that NETDEV_CHANGE
* can only be generated between NETDEV_UP and NETDEV_DOWN */ if (!netif_running(efx->net_dev)) return;
if (link_state->up != netif_carrier_ok(efx->net_dev)) {
efx->n_link_state_changes++;
if (link_state->up)
netif_carrier_on(efx->net_dev); else
netif_carrier_off(efx->net_dev);
}
/* Status message for kernel log */ if (link_state->up)
netif_info(efx, link, efx->net_dev, "link up at %uMbps %s-duplex (MTU %d)\n",
link_state->speed, link_state->fd ? "full" : "half",
efx->net_dev->mtu); else
netif_info(efx, link, efx->net_dev, "link down\n");
}
/* We assume that efx->type->reconfigure_mac will always try to sync RX * filters and therefore needs to read-lock the filter table against freeing
*/ void ef4_mac_reconfigure(struct ef4_nic *efx)
{
down_read(&efx->filter_sem);
efx->type->reconfigure_mac(efx);
up_read(&efx->filter_sem);
}
/* Push loopback/power/transmit disable settings to the PHY, and reconfigure * the MAC appropriately. All other PHY configuration changes are pushed * through phy_op->set_link_ksettings(), and pushed asynchronously to the MAC * through ef4_monitor(). * * Callers must hold the mac_lock
*/ int __ef4_reconfigure_port(struct ef4_nic *efx)
{ enum ef4_phy_mode phy_mode; int rc;
WARN_ON(!mutex_is_locked(&efx->mac_lock));
/* Disable PHY transmit in mac level loopbacks */
phy_mode = efx->phy_mode; if (LOOPBACK_INTERNAL(efx))
efx->phy_mode |= PHY_MODE_TX_DISABLED; else
efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
rc = efx->type->reconfigure_port(efx);
if (rc)
efx->phy_mode = phy_mode;
return rc;
}
/* Reinitialise the MAC to pick up new PHY settings, even if the port is
* disabled. */ int ef4_reconfigure_port(struct ef4_nic *efx)
{ int rc;
/* Asynchronous work item for changing MAC promiscuity and multicast * hash. Avoid a drain/rx_ingress enable by reconfiguring the current
* MAC directly. */ staticvoid ef4_mac_work(struct work_struct *data)
{ struct ef4_nic *efx = container_of(data, struct ef4_nic, mac_work);
mutex_lock(&efx->mac_lock); if (efx->port_enabled)
ef4_mac_reconfigure(efx);
mutex_unlock(&efx->mac_lock);
}
staticint ef4_probe_port(struct ef4_nic *efx)
{ int rc;
/* Ensure MAC ingress/egress is enabled */
ef4_mac_reconfigure(efx);
mutex_unlock(&efx->mac_lock);
}
/* Cancel work for MAC reconfiguration, periodic hardware monitoring * and the async self-test, wait for them to finish and prevent them * being scheduled again. This doesn't cover online resets, which * should only be cancelled when removing the device.
*/ staticvoid ef4_stop_port(struct ef4_nic *efx)
{
netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
rc = pci_enable_device(pci_dev); if (rc) {
netif_err(efx, probe, efx->net_dev, "failed to enable PCI device\n"); goto fail1;
}
pci_set_master(pci_dev);
/* Set the PCI DMA mask. Try all possibilities from our genuine mask * down to 32 bits, because some architectures will allow 40 bit * masks event though they reject 46 bit masks.
*/ while (dma_mask > 0x7fffffffUL) {
rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask); if (rc == 0) break;
dma_mask >>= 1;
} if (rc) {
netif_err(efx, probe, efx->net_dev, "could not find a suitable DMA mask\n"); goto fail2;
}
netif_dbg(efx, probe, efx->net_dev, "using DMA mask %llx\n", (unsignedlonglong) dma_mask);
efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
rc = pci_request_region(pci_dev, bar, "sfc"); if (rc) {
netif_err(efx, probe, efx->net_dev, "request for memory BAR failed\n");
rc = -EIO; goto fail3;
}
efx->membase = ioremap(efx->membase_phys, mem_map_size); if (!efx->membase) {
netif_err(efx, probe, efx->net_dev, "could not map memory BAR at %llx+%x\n",
(unsignedlonglong)efx->membase_phys, mem_map_size);
rc = -ENOMEM; goto fail4;
}
netif_dbg(efx, probe, efx->net_dev, "memory BAR at %llx+%x (virtual %p)\n",
(unsignedlonglong)efx->membase_phys, mem_map_size,
efx->membase);
if (count > EF4_MAX_RX_QUEUES) {
netif_cond_dbg(efx, probe, efx->net_dev, !rss_cpus, warn, "Reducing number of rx queues from %u to %u.\n",
count, EF4_MAX_RX_QUEUES);
count = EF4_MAX_RX_QUEUES;
}
return count;
}
/* Probe the number and type of interrupts we are able to obtain, and * the resulting numbers of channels and RX queues.
*/ staticint ef4_probe_interrupts(struct ef4_nic *efx)
{ unsignedint extra_channels = 0; unsignedint i, j; int rc;
for (i = 0; i < EF4_MAX_EXTRA_CHANNELS; i++) if (efx->extra_channel_type[i])
++extra_channels;
if (efx->interrupt_mode == EF4_INT_MODE_MSIX) { struct msix_entry xentries[EF4_MAX_CHANNELS]; unsignedint n_channels;
/* We need to mark which channels really have RX and TX * queues, and adjust the TX queue numbers if we have separate * RX-only and TX-only channels.
*/
ef4_for_each_channel(channel, efx) { if (channel->channel < efx->n_rx_channels)
channel->rx_queue.core_index = channel->channel; else
channel->rx_queue.core_index = -1;
/* If the interface is supposed to be running but is not, start * the hardware and software data path, regular activity for the port * (MAC statistics, link polling, etc.) and schedule the port to be * reconfigured. Interrupts must already be enabled. This function * is safe to call multiple times, so long as the NIC is not disabled. * Requires the RTNL lock.
*/ staticvoid ef4_start_all(struct ef4_nic *efx)
{
EF4_ASSERT_RESET_SERIALISED(efx);
BUG_ON(efx->state == STATE_DISABLED);
/* Check that it is appropriate to restart the interface. All
* of these flags are safe to read under just the rtnl lock */ if (efx->port_enabled || !netif_running(efx->net_dev) ||
efx->reset_pending) return;
ef4_start_port(efx);
ef4_start_datapath(efx);
/* Start the hardware monitor if there is one */ if (efx->type->monitor != NULL)
queue_delayed_work(efx->workqueue, &efx->monitor_work,
ef4_monitor_interval);
/* Quiesce the hardware and software data path, and regular activity * for the port without bringing the link down. Safe to call multiple * times with the NIC in almost any state, but interrupts should be * enabled. Requires the RTNL lock.
*/ staticvoid ef4_stop_all(struct ef4_nic *efx)
{
EF4_ASSERT_RESET_SERIALISED(efx);
/* port_enabled can be read safely under the rtnl lock */ if (!efx->port_enabled) return;
/* update stats before we go down so we can accurately count * rx_nodesc_drops
*/
efx->type->pull_stats(efx);
spin_lock_bh(&efx->stats_lock);
efx->type->update_stats(efx, NULL, NULL);
spin_unlock_bh(&efx->stats_lock);
efx->type->stop_stats(efx);
ef4_stop_port(efx);
/* Stop the kernel transmit interface. This is only valid if * the device is stopped or detached; otherwise the watchdog * may fire immediately.
*/
WARN_ON(netif_running(efx->net_dev) &&
netif_device_present(efx->net_dev));
netif_tx_disable(efx->net_dev);
/* If channels are shared between RX and TX, so is IRQ * moderation. Otherwise, IRQ moderation is the same for all * TX channels and is not adaptive.
*/ if (efx->tx_channel_offset == 0) {
*tx_usecs = *rx_usecs;
} else { struct ef4_channel *tx_channel;
/* Run periodically off the general workqueue */ staticvoid ef4_monitor(struct work_struct *data)
{ struct ef4_nic *efx = container_of(data, struct ef4_nic,
monitor_work.work);
netif_vdbg(efx, timer, efx->net_dev, "hardware monitor executing on CPU %d\n",
raw_smp_processor_id());
BUG_ON(efx->type->monitor == NULL);
/* If the mac_lock is already held then it is likely a port * reconfiguration is already in place, which will likely do
* most of the work of monitor() anyway. */ if (mutex_trylock(&efx->mac_lock)) { if (efx->port_enabled)
efx->type->monitor(efx);
mutex_unlock(&efx->mac_lock);
}
/* Context: process, rtnl_lock() held. * Note that the kernel will ignore our return code; this method * should really be a void.
*/ int ef4_net_stop(struct net_device *net_dev)
{ struct ef4_nic *efx = netdev_priv(net_dev);
netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
raw_smp_processor_id());
/* Stop the device and flush all the channels */
ef4_stop_all(efx);
/* If disabling RX n-tuple filtering, clear existing filters */ if (net_dev->features & ~data & NETIF_F_NTUPLE) {
rc = efx->type->filter_clear_rx(efx, EF4_FILTER_PRI_MANUAL); if (rc) return rc;
}
/* If Rx VLAN filter is changed, update filters via mac_reconfigure */ if ((net_dev->features ^ data) & NETIF_F_HW_VLAN_CTAG_FILTER) { /* ef4_set_rx_mode() will schedule MAC work to update filters * when a new features are finally set in net_dev.
*/
ef4_set_rx_mode(net_dev);
}
/* Enable resets to be scheduled and check whether any were * already requested. If so, the NIC is probably hosed so we * abort.
*/
efx->state = STATE_READY;
smp_mb(); /* ensure we change state before checking reset_pending */ if (efx->reset_pending) {
netif_err(efx, probe, efx->net_dev, "aborting probe due to scheduled reset\n");
rc = -EIO; goto fail_locked;
}
/************************************************************************** * * Device reset and suspend *
**************************************************************************/
/* Tears down the entire software state and most of the hardware state
* before reset. */ void ef4_reset_down(struct ef4_nic *efx, enum reset_type method)
{
EF4_ASSERT_RESET_SERIALISED(efx);
/* This function will always ensure that the locks acquired in * ef4_reset_down() are released. A failure return code indicates * that we were unable to reinitialise the hardware, and the * driver should be disabled. If ok is false, then the rx and tx
* engines are not restarted, pending a RESET_DISABLE. */ int ef4_reset_up(struct ef4_nic *efx, enum reset_type method, bool ok)
{ int rc;
EF4_ASSERT_RESET_SERIALISED(efx);
/* Ensure that SRAM is initialised even if we're disabling the device */
rc = efx->type->init(efx); if (rc) {
netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n"); goto fail;
}
if (!ok) goto fail;
if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
method != RESET_TYPE_DATAPATH) {
rc = efx->phy_op->init(efx); if (rc) goto fail;
rc = efx->phy_op->reconfigure(efx); if (rc && rc != -EPERM)
netif_err(efx, drv, efx->net_dev, "could not restore PHY settings\n");
}
rc = ef4_enable_interrupts(efx); if (rc) goto fail;
/* Reset the NIC using the specified method. Note that the reset may * fail, in which case the card will be left in an unusable state. * * Caller must hold the rtnl_lock.
*/ int ef4_reset(struct ef4_nic *efx, enum reset_type method)
{ int rc, rc2; bool disabled;
rc = efx->type->reset(efx, method); if (rc) {
netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n"); goto out;
}
/* Clear flags for the scopes we covered. We assume the NIC and * driver are now quiescent so that there is no race here.
*/ if (method < RESET_TYPE_MAX_METHOD)
efx->reset_pending &= -(1 << (method + 1)); else/* it doesn't fit into the well-ordered scope hierarchy */
__clear_bit(method, &efx->reset_pending);
/* Reinitialise bus-mastering, which may have been turned off before * the reset was scheduled. This is still appropriate, even in the * RESET_TYPE_DISABLE since this driver generally assumes the hardware
* can respond to requests. */
pci_set_master(efx->pci_dev);
/* Try recovery mechanisms. * For now only EEH is supported. * Returns 0 if the recovery mechanisms are unsuccessful. * Returns a non-zero value otherwise.
*/ int ef4_try_recovery(struct ef4_nic *efx)
{ #ifdef CONFIG_EEH /* A PCI error can occur and not be seen by EEH because nothing * happens on the PCI bus. In this case the driver may fail and * schedule a 'recover or reset', leading to this recovery handler. * Manually call the eeh failure check function.
*/ struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev); if (eeh_dev_check_failure(eehdev)) { /* The EEH mechanisms will handle the error and reset the * device if necessary.
*/ return 1;
} #endif return 0;
}
/* The worker thread exists so that code that cannot sleep can * schedule a reset for later.
*/ staticvoid ef4_reset_work(struct work_struct *data)
{ struct ef4_nic *efx = container_of(data, struct ef4_nic, reset_work);
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.49 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.