/* Local includes */ #include"i40e.h" #include"i40e_devids.h" #include"i40e_diag.h" #include"i40e_lan_hmc.h" #include"i40e_virtchnl_pf.h" #include"i40e_xsk.h"
/* All i40e tracepoints are defined by the include below, which * must be included exactly once across the whole kernel with * CREATE_TRACE_POINTS defined
*/ #define CREATE_TRACE_POINTS #include"i40e_trace.h"
/** * i40e_hw_to_dev - get device pointer from the hardware structure * @hw: pointer to the device HW structure
**/ struct device *i40e_hw_to_dev(struct i40e_hw *hw)
{ struct i40e_pf *pf = i40e_hw_to_pf(hw);
return &pf->pdev->dev;
}
/** * i40e_allocate_dma_mem - OS specific memory alloc for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out * @size: size of memory requested * @alignment: what to align the allocation to
**/ int i40e_allocate_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem,
u64 size, u32 alignment)
{ struct i40e_pf *pf = i40e_hw_to_pf(hw);
/** * i40e_free_dma_mem - OS specific memory free for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to free
**/ int i40e_free_dma_mem(struct i40e_hw *hw, struct i40e_dma_mem *mem)
{ struct i40e_pf *pf = i40e_hw_to_pf(hw);
/** * i40e_allocate_virt_mem - OS specific memory alloc for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out * @size: size of memory requested
**/ int i40e_allocate_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem,
u32 size)
{
mem->size = size;
mem->va = kzalloc(size, GFP_KERNEL);
if (!mem->va) return -ENOMEM;
return 0;
}
/** * i40e_free_virt_mem - OS specific memory free for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to free
**/ int i40e_free_virt_mem(struct i40e_hw *hw, struct i40e_virt_mem *mem)
{ /* it's ok to kfree a NULL pointer */
kfree(mem->va);
mem->va = NULL;
mem->size = 0;
return 0;
}
/** * i40e_get_lump - find a lump of free generic resource * @pf: board private structure * @pile: the pile of resource to search * @needed: the number of items needed * @id: an owner id to stick on the items assigned * * Returns the base item index of the lump, or negative for error
**/ staticint i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
u16 needed, u16 id)
{ int ret = -ENOMEM; int i, j;
/* Allocate last queue in the pile for FDIR VSI queue * so it doesn't fragment the qp_pile
*/ if (pile == pf->qp_pile && pf->vsi[id]->type == I40E_VSI_FDIR) { if (pile->list[pile->num_entries - 1] & I40E_PILE_VALID_BIT) {
dev_err(&pf->pdev->dev, "Cannot allocate queue %d for I40E_VSI_FDIR\n",
pile->num_entries - 1); return -ENOMEM;
}
pile->list[pile->num_entries - 1] = id | I40E_PILE_VALID_BIT; return pile->num_entries - 1;
}
i = 0; while (i < pile->num_entries) { /* skip already allocated entries */ if (pile->list[i] & I40E_PILE_VALID_BIT) {
i++; continue;
}
/* do we have enough in this lump? */ for (j = 0; (j < needed) && ((i+j) < pile->num_entries); j++) { if (pile->list[i+j] & I40E_PILE_VALID_BIT) break;
}
if (j == needed) { /* there was enough, so assign it to the requestor */ for (j = 0; j < needed; j++)
pile->list[i+j] = id | I40E_PILE_VALID_BIT;
ret = i; break;
}
/* not enough, so skip over it and continue looking */
i += j;
}
return ret;
}
/** * i40e_put_lump - return a lump of generic resource * @pile: the pile of resource to search * @index: the base item index * @id: the owner id of the items assigned * * Returns the count of items in the lump
**/ staticint i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
{ int valid_id = (id | I40E_PILE_VALID_BIT); int count = 0;
u16 i;
if (!pile || index >= pile->num_entries) return -EINVAL;
for (i = index;
i < pile->num_entries && pile->list[i] == valid_id;
i++) {
pile->list[i] = 0;
count++;
}
return count;
}
/** * i40e_find_vsi_from_id - searches for the vsi with the given id * @pf: the pf structure to search for the vsi * @id: id of the vsi it is searching for
**/ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id)
{ struct i40e_vsi *vsi; int i;
i40e_pf_for_each_vsi(pf, i, vsi) if (vsi->id == id) return vsi;
return NULL;
}
/** * i40e_service_event_schedule - Schedule the service task to wake up * @pf: board private structure * * If not already scheduled, this puts the task into the work queue
**/ void i40e_service_event_schedule(struct i40e_pf *pf)
{ if ((!test_bit(__I40E_DOWN, pf->state) &&
!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) ||
test_bit(__I40E_RECOVERY_MODE, pf->state))
queue_work(i40e_wq, &pf->service_task);
}
/** * i40e_tx_timeout - Respond to a Tx Hang * @netdev: network interface device structure * @txqueue: queue number timing out * * If any port has noticed a Tx timeout, it is likely that the whole * device is munged, not just the one netdev port, so go for the full * reset.
**/ staticvoid i40e_tx_timeout(struct net_device *netdev, unsignedint txqueue)
{ struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_ring *tx_ring = NULL; unsignedint i;
u32 head, val;
pf->tx_timeout_count++;
/* with txqueue index, find the tx_ring struct */ for (i = 0; i < vsi->num_queue_pairs; i++) { if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) { if (txqueue ==
vsi->tx_rings[i]->queue_index) {
tx_ring = vsi->tx_rings[i]; break;
}
}
}
if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ*20)))
pf->tx_timeout_recovery_level = 1; /* reset after some time */ elseif (time_before(jiffies,
(pf->tx_timeout_last_recovery + netdev->watchdog_timeo))) return; /* don't do any new action before the next timeout */
/* don't kick off another recovery if one is already pending */ if (test_and_set_bit(__I40E_TIMEOUT_RECOVERY_PENDING, pf->state)) return;
if (tx_ring) {
head = i40e_get_head(tx_ring); /* Read interrupt register */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
val = rd32(&pf->hw,
I40E_PFINT_DYN_CTLN(tx_ring->q_vector->v_idx +
tx_ring->vsi->base_vector - 1)); else
val = rd32(&pf->hw, I40E_PFINT_DYN_CTL0);
/** * i40e_get_vsi_stats_struct - Get System Network Statistics * @vsi: the VSI we care about * * Returns the address of the device statistics structure. * The statistics are actually updated from the service task.
**/ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi)
{ return &vsi->net_stats;
}
/** * i40e_get_netdev_stats_struct_tx - populate stats from a Tx ring * @ring: Tx ring to get statistics from * @stats: statistics entry to be updated
**/ staticvoid i40e_get_netdev_stats_struct_tx(struct i40e_ring *ring, struct rtnl_link_stats64 *stats)
{
u64 bytes, packets; unsignedint start;
do {
start = u64_stats_fetch_begin(&ring->syncp);
packets = ring->stats.packets;
bytes = ring->stats.bytes;
} while (u64_stats_fetch_retry(&ring->syncp, start));
/** * i40e_vsi_reset_stats - Resets all stats of the given vsi * @vsi: the VSI to have its stats reset
**/ void i40e_vsi_reset_stats(struct i40e_vsi *vsi)
{ struct rtnl_link_stats64 *ns; int i;
/** * i40e_pf_reset_stats - Reset all of the stats for the given PF * @pf: the PF to be reset
**/ void i40e_pf_reset_stats(struct i40e_pf *pf)
{ struct i40e_veb *veb; int i;
/** * i40e_compute_pci_to_hw_id - compute index form PCI function. * @vsi: ptr to the VSI to read from. * @hw: ptr to the hardware info.
**/ static u32 i40e_compute_pci_to_hw_id(struct i40e_vsi *vsi, struct i40e_hw *hw)
{ int pf_count = i40e_get_pf_count(hw);
/** * i40e_stat_update64 - read and update a 64 bit stat from the chip. * @hw: ptr to the hardware info. * @hireg: the high 32 bit reg to read. * @loreg: the low 32 bit reg to read. * @offset_loaded: has the initial offset been loaded yet. * @offset: ptr to current offset value. * @stat: ptr to the stat. * * Since the device stats are not reset at PFReset, they will not * be zeroed when the driver starts. We'll save the first values read * and use them as offsets to be subtracted from the raw values in order * to report stats that count from zero.
**/ staticvoid i40e_stat_update64(struct i40e_hw *hw, u32 hireg, u32 loreg, bool offset_loaded, u64 *offset, u64 *stat)
{
u64 new_data;
/** * i40e_stat_update48 - read and update a 48 bit stat from the chip * @hw: ptr to the hardware info * @hireg: the high 32 bit reg to read * @loreg: the low 32 bit reg to read * @offset_loaded: has the initial offset been loaded yet * @offset: ptr to current offset value * @stat: ptr to the stat * * Since the device stats are not reset at PFReset, they likely will not * be zeroed when the driver starts. We'll save the first values read * and use them as offsets to be subtracted from the raw values in order * to report stats that count from zero. In the process, we also manage * the potential roll-over.
**/ staticvoid i40e_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg, bool offset_loaded, u64 *offset, u64 *stat)
{
u64 new_data;
/** * i40e_stat_update32 - read and update a 32 bit stat from the chip * @hw: ptr to the hardware info * @reg: the hw reg to read * @offset_loaded: has the initial offset been loaded yet * @offset: ptr to current offset value * @stat: ptr to the stat
**/ staticvoid i40e_stat_update32(struct i40e_hw *hw, u32 reg, bool offset_loaded, u64 *offset, u64 *stat)
{
u32 new_data;
/** * i40e_stat_update_and_clear32 - read and clear hw reg, update a 32 bit stat * @hw: ptr to the hardware info * @reg: the hw reg to read and clear * @stat: ptr to the stat
**/ staticvoid i40e_stat_update_and_clear32(struct i40e_hw *hw, u32 reg, u64 *stat)
{
u32 new_data = rd32(hw, reg);
wr32(hw, reg, 1); /* must write a nonzero value to clear register */
*stat += new_data;
}
/** * i40e_stats_update_rx_discards - update rx_discards. * @vsi: ptr to the VSI to be updated. * @hw: ptr to the hardware info. * @stat_idx: VSI's stat_counter_idx. * @offset_loaded: ptr to the VSI's stat_offsets_loaded. * @stat_offset: ptr to stat_offset to store first read of specific register. * @stat: ptr to VSI's stat to be updated.
**/ staticvoid
i40e_stats_update_rx_discards(struct i40e_vsi *vsi, struct i40e_hw *hw, int stat_idx, bool offset_loaded, struct i40e_eth_stats *stat_offset, struct i40e_eth_stats *stat)
{
i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx), offset_loaded,
&stat_offset->rx_discards, &stat->rx_discards);
i40e_stat_update64(hw,
I40E_GL_RXERR1H(i40e_compute_pci_to_hw_id(vsi, hw)),
I40E_GL_RXERR1L(i40e_compute_pci_to_hw_id(vsi, hw)),
offset_loaded, &stat_offset->rx_discards_other,
&stat->rx_discards_other);
}
/** * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters. * @vsi: the VSI to be updated
**/ void i40e_update_eth_stats(struct i40e_vsi *vsi)
{ int stat_idx = le16_to_cpu(vsi->info.stat_counter_idx); struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */
es = &vsi->eth_stats;
oes = &vsi->eth_stats_offsets;
/* Gather up the stats that the hw collects */
i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx),
vsi->stat_offsets_loaded,
&oes->tx_errors, &es->tx_errors);
i40e_stat_update32(hw, I40E_GLV_RUPP(stat_idx),
vsi->stat_offsets_loaded,
&oes->rx_unknown_protocol, &es->rx_unknown_protocol);
/** * i40e_update_vsi_stats - Update the vsi statistics counters. * @vsi: the VSI to be updated * * There are a few instances where we store the same stat in a * couple of different structs. This is partly because we have * the netdev stats that need to be filled out, which is slightly * different from the "eth_stats" defined by the chip and used in * VF communications. We sort it out here.
**/ staticvoid i40e_update_vsi_stats(struct i40e_vsi *vsi)
{
u64 rx_page, rx_buf, rx_reuse, rx_alloc, rx_waive, rx_busy; struct i40e_pf *pf = vsi->back; struct rtnl_link_stats64 *ons; struct rtnl_link_stats64 *ns; /* netdev stats */ struct i40e_eth_stats *oes; struct i40e_eth_stats *es; /* device's eth stats */
u64 tx_restart, tx_busy; struct i40e_ring *p;
u64 bytes, packets; unsignedint start;
u64 tx_linearize;
u64 tx_force_wb;
u64 tx_stopped;
u64 rx_p, rx_b;
u64 tx_p, tx_b;
u16 q;
if (test_bit(__I40E_VSI_DOWN, vsi->state) ||
test_bit(__I40E_CONFIG_BUSY, pf->state)) return;
ns = i40e_get_vsi_stats_struct(vsi);
ons = &vsi->net_stats_offsets;
es = &vsi->eth_stats;
oes = &vsi->eth_stats_offsets;
/* Gather up the netdev and vsi stats that the driver collects * on the fly during packet processing
*/
rx_b = rx_p = 0;
tx_b = tx_p = 0;
tx_restart = tx_busy = tx_linearize = tx_force_wb = 0;
tx_stopped = 0;
rx_page = 0;
rx_buf = 0;
rx_reuse = 0;
rx_alloc = 0;
rx_waive = 0;
rx_busy = 0;
rcu_read_lock(); for (q = 0; q < vsi->num_queue_pairs; q++) { /* locate Tx ring */
p = READ_ONCE(vsi->tx_rings[q]); if (!p) continue;
/* pull in a couple PF stats if this is the main vsi */ if (vsi->type == I40E_VSI_MAIN) {
ns->rx_crc_errors = pf->stats.crc_errors;
ns->rx_errors = pf->stats.crc_errors + pf->stats.illegal_bytes;
ns->rx_length_errors = pf->stats.rx_length_errors;
}
}
/** * i40e_update_pf_stats - Update the PF statistics counters. * @pf: the PF to be updated
**/ staticvoid i40e_update_pf_stats(struct i40e_pf *pf)
{ struct i40e_hw_port_stats *osd = &pf->stats_offsets; struct i40e_hw_port_stats *nsd = &pf->stats; struct i40e_hw *hw = &pf->hw;
u32 val; int i;
/** * i40e_update_stats - Update the various statistics counters. * @vsi: the VSI to be updated * * Update the various stats for this VSI and its related entities.
**/ void i40e_update_stats(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back;
if (vsi->type == I40E_VSI_MAIN)
i40e_update_pf_stats(pf);
i40e_update_vsi_stats(vsi);
}
/** * i40e_count_all_filters - counts VSI MAC filters * @vsi: the VSI to be searched * * Return: count of MAC filters in any state.
*/ int i40e_count_all_filters(struct i40e_vsi *vsi)
{ struct i40e_mac_filter *f; struct hlist_node *h; int bkt, cnt = 0;
/** * i40e_count_active_filters - counts VSI MAC filters * @vsi: the VSI to be searched * * Return: count of active MAC filters.
*/ int i40e_count_active_filters(struct i40e_vsi *vsi)
{ struct i40e_mac_filter *f; struct hlist_node *h; int bkt; int cnt = 0;
/** * i40e_find_filter - Search VSI filter list for specific mac/vlan filter * @vsi: the VSI to be searched * @macaddr: the MAC address * @vlan: the vlan * * Returns ptr to the filter object or NULL
**/ staticstruct i40e_mac_filter *i40e_find_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan)
{ struct i40e_mac_filter *f;
u64 key;
/** * i40e_find_mac - Find a mac addr in the macvlan filters list * @vsi: the VSI to be searched * @macaddr: the MAC address we are searching for * * Returns the first filter with the provided MAC address or NULL if * MAC address was not found
**/ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr)
{ struct i40e_mac_filter *f;
u64 key;
/** * i40e_is_vsi_in_vlan - Check if VSI is in vlan mode * @vsi: the VSI to be searched * * Returns true if VSI is in vlan mode or false otherwise
**/ bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi)
{ /* If we have a PVID, always operate in VLAN mode */ if (vsi->info.pvid) returntrue;
/* We need to operate in VLAN mode whenever we have any filters with * a VLAN other than I40E_VLAN_ALL. We could check the table each * time, incurring search cost repeatedly. However, we can notice two * things: * * 1) the only place where we can gain a VLAN filter is in * i40e_add_filter. * * 2) the only place where filters are actually removed is in * i40e_sync_filters_subtask. * * Thus, we can simply use a boolean value, has_vlan_filters which we * will set to true when we add a VLAN filter in i40e_add_filter. Then * we have to perform the full search after deleting filters in * i40e_sync_filters_subtask, but we already have to search * filters here and can perform the check at the same time. This * results in avoiding embedding a loop for VLAN mode inside another * loop over all the filters, and should maintain correctness as noted * above.
*/ return vsi->has_vlan_filter;
}
/** * i40e_correct_mac_vlan_filters - Correct non-VLAN filters if necessary * @vsi: the VSI to configure * @tmp_add_list: list of filters ready to be added * @tmp_del_list: list of filters ready to be deleted * @vlan_filters: the number of active VLAN filters * * Update VLAN=0 and VLAN=-1 (I40E_VLAN_ANY) filters properly so that they * behave as expected. If we have any active VLAN filters remaining or about * to be added then we need to update non-VLAN filters to be marked as VLAN=0 * so that they only match against untagged traffic. If we no longer have any * active VLAN filters, we need to make all non-VLAN filters marked as VLAN=-1 * so that they match against both tagged and untagged traffic. In this way, * we ensure that we correctly receive the desired traffic. This ensures that * when we have an active VLAN we will receive only untagged traffic and * traffic matching active VLANs. If we have no active VLANs then we will * operate in non-VLAN mode and receive all traffic, tagged or untagged. * * Finally, in a similar fashion, this function also corrects filters when * there is an active PVID assigned to this VSI. * * In case of memory allocation failure return -ENOMEM. Otherwise, return 0. * * This function is only expected to be called from within * i40e_sync_vsi_filters. * * NOTE: This function expects to be called while under the * mac_filter_hash_lock
*/ staticint i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi, struct hlist_head *tmp_add_list, struct hlist_head *tmp_del_list, int vlan_filters)
{
s16 pvid = le16_to_cpu(vsi->info.pvid); struct i40e_mac_filter *f, *add_head; struct i40e_new_mac_filter *new; struct hlist_node *h; int bkt, new_vlan;
/* To determine if a particular filter needs to be replaced we * have the three following conditions: * * a) if we have a PVID assigned, then all filters which are * not marked as VLAN=PVID must be replaced with filters that * are. * b) otherwise, if we have any active VLANS, all filters * which are marked as VLAN=-1 must be replaced with * filters marked as VLAN=0 * c) finally, if we do not have any active VLANS, all filters * which are marked as VLAN=0 must be replaced with filters * marked as VLAN=-1
*/
/* Update the filters about to be added in place */
hlist_for_each_entry(new, tmp_add_list, hlist) { if (pvid && new->f->vlan != pvid)
new->f->vlan = pvid; elseif (vlan_filters && new->f->vlan == I40E_VLAN_ANY)
new->f->vlan = 0; elseif (!vlan_filters && new->f->vlan == 0)
new->f->vlan = I40E_VLAN_ANY;
}
/* Update the remaining active filters */
hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { /* Combine the checks for whether a filter needs to be changed * and then determine the new VLAN inside the if block, in * order to avoid duplicating code for adding the new filter * then deleting the old filter.
*/ if ((pvid && f->vlan != pvid) ||
(vlan_filters && f->vlan == I40E_VLAN_ANY) ||
(!vlan_filters && f->vlan == 0)) { /* Determine the new vlan we will be adding */ if (pvid)
new_vlan = pvid; elseif (vlan_filters)
new_vlan = 0; else
new_vlan = I40E_VLAN_ANY;
/* Create the new filter */
add_head = i40e_add_filter(vsi, f->macaddr, new_vlan); if (!add_head) return -ENOMEM;
/* Create a temporary i40e_new_mac_filter */ new = kzalloc(sizeof(*new), GFP_ATOMIC); if (!new) return -ENOMEM;
/* Add the new filter to the tmp list */
hlist_add_head(&new->hlist, tmp_add_list);
/* Put the original filter into the delete list */
f->state = I40E_FILTER_REMOVE;
hash_del(&f->hlist);
hlist_add_head(&f->hlist, tmp_del_list);
}
}
vsi->has_vlan_filter = !!vlan_filters;
return 0;
}
/** * i40e_get_vf_new_vlan - Get new vlan id on a vf * @vsi: the vsi to configure * @new_mac: new mac filter to be added * @f: existing mac filter, replaced with new_mac->f if new_mac is not NULL * @vlan_filters: the number of active VLAN filters * @trusted: flag if the VF is trusted * * Get new VLAN id based on current VLAN filters, trust, PVID * and vf-vlan-prune-disable flag. * * Returns the value of the new vlan filter or * the old value if no new filter is needed.
*/ static s16 i40e_get_vf_new_vlan(struct i40e_vsi *vsi, struct i40e_new_mac_filter *new_mac, struct i40e_mac_filter *f, int vlan_filters, bool trusted)
{
s16 pvid = le16_to_cpu(vsi->info.pvid); struct i40e_pf *pf = vsi->back; bool is_any;
/** * i40e_correct_vf_mac_vlan_filters - Correct non-VLAN VF filters if necessary * @vsi: the vsi to configure * @tmp_add_list: list of filters ready to be added * @tmp_del_list: list of filters ready to be deleted * @vlan_filters: the number of active VLAN filters * @trusted: flag if the VF is trusted * * Correct VF VLAN filters based on current VLAN filters, trust, PVID * and vf-vlan-prune-disable flag. * * In case of memory allocation failure return -ENOMEM. Otherwise, return 0. * * This function is only expected to be called from within * i40e_sync_vsi_filters. * * NOTE: This function expects to be called while under the * mac_filter_hash_lock
*/ staticint i40e_correct_vf_mac_vlan_filters(struct i40e_vsi *vsi, struct hlist_head *tmp_add_list, struct hlist_head *tmp_del_list, int vlan_filters, bool trusted)
{ struct i40e_mac_filter *f, *add_head; struct i40e_new_mac_filter *new_mac; struct hlist_node *h; int bkt, new_vlan;
/** * i40e_rm_default_mac_filter - Remove the default MAC filter set by NVM * @vsi: the PF Main VSI - inappropriate for any other VSI * @macaddr: the MAC address * * Remove whatever filter the firmware set up so the driver can manage * its own filtering intelligently.
**/ staticvoid i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr)
{ struct i40e_aqc_remove_macvlan_element_data element; struct i40e_pf *pf = vsi->back;
/* Only appropriate for the PF main VSI */ if (vsi->type != I40E_VSI_MAIN) return;
memset(&element, 0, sizeof(element));
ether_addr_copy(element.mac_addr, macaddr);
element.vlan_tag = 0; /* Ignore error returns, some firmware does it this way... */
element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
memset(&element, 0, sizeof(element));
ether_addr_copy(element.mac_addr, macaddr);
element.vlan_tag = 0; /* ...and some firmware does it this way. */
element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH |
I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL);
}
/** * i40e_add_filter - Add a mac/vlan filter to the VSI * @vsi: the VSI to be searched * @macaddr: the MAC address * @vlan: the vlan * * Returns ptr to the filter object or NULL when no memory available. * * NOTE: This function is expected to be called with mac_filter_hash_lock * being held.
**/ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan)
{ struct i40e_mac_filter *f;
u64 key;
if (!vsi || !macaddr) return NULL;
f = i40e_find_filter(vsi, macaddr, vlan); if (!f) {
f = kzalloc(sizeof(*f), GFP_ATOMIC); if (!f) return NULL;
/* Update the boolean indicating if we need to function in * VLAN mode.
*/ if (vlan >= 0)
vsi->has_vlan_filter = true;
/* If we're asked to add a filter that has been marked for removal, it * is safe to simply restore it to active state. __i40e_del_filter * will have simply deleted any filters which were previously marked * NEW or FAILED, so if it is currently marked REMOVE it must have * previously been ACTIVE. Since we haven't yet run the sync filters * task, just restore this filter to the ACTIVE state so that the * sync task leaves it in place
*/ if (f->state == I40E_FILTER_REMOVE)
f->state = I40E_FILTER_ACTIVE;
return f;
}
/** * __i40e_del_filter - Remove a specific filter from the VSI * @vsi: VSI to remove from * @f: the filter to remove from the list * * This function requires you've found * the exact filter you will remove * already, such as via i40e_find_filter or i40e_find_mac. * * NOTE: This function is expected to be called with mac_filter_hash_lock * being held. * ANOTHER NOTE: This function MUST be called from within the context of * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe() * instead of list_for_each_entry().
**/ void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
{ if (!f) return;
/* If the filter was never added to firmware then we can just delete it * directly and we don't want to set the status to remove or else an * admin queue command will unnecessarily fire.
*/ if ((f->state == I40E_FILTER_FAILED) ||
(f->state == I40E_FILTER_NEW)) {
hash_del(&f->hlist);
kfree(f);
} else {
f->state = I40E_FILTER_REMOVE;
}
/** * i40e_add_mac_filter - Add a MAC filter for all active VLANs * @vsi: the VSI to be searched * @macaddr: the mac address to be filtered * * If we're not in VLAN mode, just add the filter to I40E_VLAN_ANY. Otherwise, * go through all the macvlan filters and add a macvlan filter for each * unique vlan that already exists. If a PVID has been assigned, instead only * add the macaddr to that VLAN. * * Returns last filter added on success, else NULL
**/ struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr)
{ struct i40e_mac_filter *f, *add = NULL; struct hlist_node *h; int bkt;
lockdep_assert_held(&vsi->mac_filter_hash_lock); if (vsi->info.pvid) return i40e_add_filter(vsi, macaddr,
le16_to_cpu(vsi->info.pvid));
if (!i40e_is_vsi_in_vlan(vsi)) return i40e_add_filter(vsi, macaddr, I40E_VLAN_ANY);
/** * i40e_del_mac_filter - Remove a MAC filter from all VLANs * @vsi: the VSI to be searched * @macaddr: the mac address to be removed * * Removes a given MAC address from a VSI regardless of what VLAN it has been * associated with. * * Returns 0 for success, or error
**/ int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr)
{ struct i40e_mac_filter *f; struct hlist_node *h; bool found = false; int bkt;
lockdep_assert_held(&vsi->mac_filter_hash_lock);
hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (ether_addr_equal(macaddr, f->macaddr)) {
__i40e_del_filter(vsi, f);
found = true;
}
}
if (found) return 0; else return -ENOENT;
}
/** * i40e_set_mac - NDO callback to set mac address * @netdev: network interface device structure * @p: pointer to an address structure * * Returns 0 on success, negative on failure
**/ staticint i40e_set_mac(struct net_device *netdev, void *p)
{ struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct sockaddr *addr = p;
if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL;
if (test_bit(__I40E_DOWN, pf->state) ||
test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) return -EADDRNOTAVAIL;
if (ether_addr_equal(hw->mac.addr, addr->sa_data))
netdev_info(netdev, "returning to hw mac address %pM\n",
hw->mac.addr); else
netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
/* Copy the address first, so that we avoid a possible race with * .set_rx_mode(). * - Remove old address from MAC filter * - Copy new address * - Add new address to MAC filter
*/
spin_lock_bh(&vsi->mac_filter_hash_lock);
i40e_del_mac_filter(vsi, netdev->dev_addr);
eth_hw_addr_set(netdev, addr->sa_data);
i40e_add_mac_filter(vsi, netdev->dev_addr);
spin_unlock_bh(&vsi->mac_filter_hash_lock);
if (vsi->type == I40E_VSI_MAIN) { int ret;
ret = i40e_aq_mac_address_write(hw, I40E_AQC_WRITE_TYPE_LAA_WOL,
addr->sa_data, NULL); if (ret)
netdev_info(netdev, "Ignoring error from firmware on LAA update, status %pe, AQ ret %s\n",
ERR_PTR(ret),
libie_aq_str(hw->aq.asq_last_status));
}
/* schedule our worker thread which will take care of * applying the new filter changes
*/
i40e_service_event_schedule(pf); return 0;
}
/** * i40e_config_rss_aq - Prepare for RSS using AQ commands * @vsi: vsi structure * @seed: RSS hash seed * @lut: pointer to lookup table of lut_size * @lut_size: size of the lookup table
**/ staticint i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
u8 *lut, u16 lut_size)
{ struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int ret = 0;
if (seed) { struct i40e_aqc_get_set_rss_key_data *seed_dw =
(struct i40e_aqc_get_set_rss_key_data *)seed;
ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw); if (ret) {
dev_info(&pf->pdev->dev, "Cannot set RSS key, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(hw->aq.asq_last_status)); return ret;
}
} if (lut) { bool pf_lut = vsi->type == I40E_VSI_MAIN;
ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); if (ret) {
dev_info(&pf->pdev->dev, "Cannot set RSS lut, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(hw->aq.asq_last_status)); return ret;
}
} return ret;
}
/** * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used * @vsi: VSI structure
**/ staticint i40e_vsi_config_rss(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back;
u8 seed[I40E_HKEY_ARRAY_SIZE];
u8 *lut; int ret;
if (!test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps)) return 0; if (!vsi->rss_size)
vsi->rss_size = min_t(int, pf->alloc_rss_size,
vsi->num_queue_pairs); if (!vsi->rss_size) return -EINVAL;
lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM;
/* Use the user configured hash keys and lookup table if there is one, * otherwise use default
*/ if (vsi->rss_lut_user)
memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); else
i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); if (vsi->rss_hkey_user)
memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); else
netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size);
kfree(lut); return ret;
}
/** * i40e_vsi_setup_queue_map_mqprio - Prepares mqprio based tc_config * @vsi: the VSI being configured, * @ctxt: VSI context structure * @enabled_tc: number of traffic classes to enable * * Prepares VSI tc_config to have queue configurations based on MQPRIO options.
**/ staticint i40e_vsi_setup_queue_map_mqprio(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt,
u8 enabled_tc)
{
u16 qcount = 0, max_qcount, qmap, sections = 0; int i, override_q, pow, num_qps, ret;
u8 netdev_tc = 0, offset = 0;
/* find the next higher power-of-2 of num queue pairs */
pow = ilog2(num_qps); if (!is_power_of_2(num_qps))
pow++;
qmap = (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
(pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
/* Setup queue offset/count for all TCs for given VSI */
max_qcount = vsi->mqprio_qopt.qopt.count[0]; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { /* See if the given TC is enabled for the given VSI */ if (vsi->tc_config.enabled_tc & BIT(i)) {
offset = vsi->mqprio_qopt.qopt.offset[i];
qcount = vsi->mqprio_qopt.qopt.count[i]; if (qcount > max_qcount)
max_qcount = qcount;
vsi->tc_config.tc_info[i].qoffset = offset;
vsi->tc_config.tc_info[i].qcount = qcount;
vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
} else { /* TC is not enabled so set the offset to * default queue and allocate one queue * for the given TC.
*/
vsi->tc_config.tc_info[i].qoffset = 0;
vsi->tc_config.tc_info[i].qcount = 1;
vsi->tc_config.tc_info[i].netdev_tc = 0;
}
}
/* Set actual Tx/Rx queue pairs */
vsi->num_queue_pairs = offset + qcount;
/* Setup queue TC[0].qmap for given VSI context */
ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
ctxt->info.mapping_flags |= cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
ctxt->info.valid_sections |= cpu_to_le16(sections);
/* Reconfigure RSS for main VSI with max queue count */
vsi->rss_size = max_qcount;
ret = i40e_vsi_config_rss(vsi); if (ret) {
dev_info(&vsi->back->pdev->dev, "Failed to reconfig rss for num_queues (%u)\n",
max_qcount); return ret;
}
vsi->reconfig_rss = true;
dev_dbg(&vsi->back->pdev->dev, "Reconfigured rss with num_queues (%u)\n", max_qcount);
/* Find queue count available for channel VSIs and starting offset * for channel VSIs
*/
override_q = vsi->mqprio_qopt.qopt.count[0]; if (override_q && override_q < vsi->num_queue_pairs) {
vsi->cnt_q_avail = vsi->num_queue_pairs - override_q;
vsi->next_base_queue = override_q;
} return 0;
}
/** * i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc * @vsi: the VSI being setup * @ctxt: VSI context structure * @enabled_tc: Enabled TCs bitmap * @is_add: True if called before Add VSI * * Setup VSI queue mapping for enabled traffic classes.
**/ staticvoid i40e_vsi_setup_queue_map(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt,
u8 enabled_tc, bool is_add)
{ struct i40e_pf *pf = vsi->back;
u16 num_tc_qps = 0;
u16 sections = 0;
u8 netdev_tc = 0;
u16 numtc = 1;
u16 qcount;
u8 offset;
u16 qmap; int i;
sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
offset = 0; /* zero out queue mapping, it will get updated on the end of the function */
memset(ctxt->info.queue_mapping, 0, sizeof(ctxt->info.queue_mapping));
if (vsi->type == I40E_VSI_MAIN) { /* This code helps add more queue to the VSI if we have * more cores than RSS can support, the higher cores will * be served by ATR or other filters. Furthermore, the * non-zero req_queue_pairs says that user requested a new * queue count via ethtool's set_channels, so use this * value for queues distribution across traffic classes * We need at least one queue pair for the interface * to be usable as we see in else statement.
*/ if (vsi->req_queue_pairs > 0)
vsi->num_queue_pairs = vsi->req_queue_pairs; elseif (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
vsi->num_queue_pairs = pf->num_lan_msix; else
vsi->num_queue_pairs = 1;
}
/* Number of queues per enabled TC */ if (vsi->type == I40E_VSI_MAIN ||
(vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs != 0))
num_tc_qps = vsi->num_queue_pairs; else
num_tc_qps = vsi->alloc_queue_pairs;
if (enabled_tc && test_bit(I40E_FLAG_DCB_ENA, vsi->back->flags)) { /* Find numtc from enabled TC bitmap */ for (i = 0, numtc = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (enabled_tc & BIT(i)) /* TC is enabled */
numtc++;
} if (!numtc) {
dev_warn(&pf->pdev->dev, "DCB is enabled but no TC enabled, forcing TC0\n");
numtc = 1;
}
num_tc_qps = num_tc_qps / numtc;
num_tc_qps = min_t(int, num_tc_qps,
i40e_pf_get_max_q_per_tc(pf));
}
/* Do not allow use more TC queue pairs than MSI-X vectors exist */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
num_tc_qps = min_t(int, num_tc_qps, pf->num_lan_msix);
/* Setup queue offset/count for all TCs for given VSI */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { /* See if the given TC is enabled for the given VSI */ if (vsi->tc_config.enabled_tc & BIT(i)) { /* TC is enabled */ int pow, num_qps;
switch (vsi->type) { case I40E_VSI_MAIN: if ((!test_bit(I40E_FLAG_FD_SB_ENA,
pf->flags) &&
!test_bit(I40E_FLAG_FD_ATR_ENA,
pf->flags)) ||
vsi->tc_config.enabled_tc != 1) {
qcount = min_t(int, pf->alloc_rss_size,
num_tc_qps); break;
}
fallthrough; case I40E_VSI_FDIR: case I40E_VSI_SRIOV: case I40E_VSI_VMDQ2: default:
qcount = num_tc_qps;
WARN_ON(i != 0); break;
}
vsi->tc_config.tc_info[i].qoffset = offset;
vsi->tc_config.tc_info[i].qcount = qcount;
/* find the next higher power-of-2 of num queue pairs */
num_qps = qcount;
pow = 0; while (num_qps && (BIT_ULL(pow) < qcount)) {
pow++;
num_qps >>= 1;
}
offset += qcount;
} else { /* TC is not enabled so set the offset to * default queue and allocate one queue * for the given TC.
*/
vsi->tc_config.tc_info[i].qoffset = 0;
vsi->tc_config.tc_info[i].qcount = 1;
vsi->tc_config.tc_info[i].netdev_tc = 0;
qmap = 0;
}
ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
} /* Do not change previously set num_queue_pairs for PFs and VFs*/ if ((vsi->type == I40E_VSI_MAIN && numtc != 1) ||
(vsi->type == I40E_VSI_SRIOV && vsi->num_queue_pairs == 0) ||
(vsi->type != I40E_VSI_MAIN && vsi->type != I40E_VSI_SRIOV))
vsi->num_queue_pairs = offset;
/* Scheduler section valid can only be set for ADD VSI */ if (is_add) {
sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
ctxt->info.up_enable_bits = enabled_tc;
} if (vsi->type == I40E_VSI_SRIOV) {
ctxt->info.mapping_flags |=
cpu_to_le16(I40E_AQ_VSI_QUE_MAP_NONCONTIG); for (i = 0; i < vsi->num_queue_pairs; i++)
ctxt->info.queue_mapping[i] =
cpu_to_le16(vsi->base_queue + i);
} else {
ctxt->info.mapping_flags |=
cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
}
ctxt->info.valid_sections |= cpu_to_le16(sections);
}
/** * i40e_addr_sync - Callback for dev_(mc|uc)_sync to add address * @netdev: the netdevice * @addr: address to add * * Called by __dev_(mc|uc)_sync when an address needs to be added. We call * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
*/ staticint i40e_addr_sync(struct net_device *netdev, const u8 *addr)
{ struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi;
if (i40e_add_mac_filter(vsi, addr)) return 0; else return -ENOMEM;
}
/** * i40e_addr_unsync - Callback for dev_(mc|uc)_sync to remove address * @netdev: the netdevice * @addr: address to add * * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
*/ staticint i40e_addr_unsync(struct net_device *netdev, const u8 *addr)
{ struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi;
/* Under some circumstances, we might receive a request to delete * our own device address from our uc list. Because we store the * device address in the VSI's MAC/VLAN filter list, we need to ignore * such requests and not delete our device address from this list.
*/ if (ether_addr_equal(addr, netdev->dev_addr)) return 0;
/* check for other flag changes */ if (vsi->current_netdev_flags != vsi->netdev->flags) {
vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
set_bit(__I40E_MACVLAN_SYNC_PENDING, vsi->back->state);
}
}
/** * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries * @vsi: Pointer to VSI struct * @from: Pointer to list which contains MAC filter entries - changes to * those entries needs to be undone. * * MAC filter entries from this list were slated for deletion.
**/ staticvoid i40e_undo_del_filter_entries(struct i40e_vsi *vsi, struct hlist_head *from)
{ struct i40e_mac_filter *f; struct hlist_node *h;
/* Move the element back into MAC filter list*/
hlist_del(&f->hlist);
hash_add(vsi->mac_filter_hash, &f->hlist, key);
}
}
/** * i40e_undo_add_filter_entries - Undo the changes made to MAC filter entries * @vsi: Pointer to vsi struct * @from: Pointer to list which contains MAC filter entries - changes to * those entries needs to be undone. * * MAC filter entries from this list were slated for addition.
**/ staticvoid i40e_undo_add_filter_entries(struct i40e_vsi *vsi, struct hlist_head *from)
{ struct i40e_new_mac_filter *new; struct hlist_node *h;
hlist_for_each_entry_safe(new, h, from, hlist) { /* We can simply free the wrapper structure */
hlist_del(&new->hlist);
netdev_hw_addr_refcnt(new->f, vsi->netdev, -1);
kfree(new);
}
}
/** * i40e_next_filter - Get the next non-broadcast filter from a list * @next: pointer to filter in list * * Returns the next non-broadcast filter in the list. Required so that we * ignore broadcast filters within the list, since these are not handled via * the normal firmware update path.
*/ static struct i40e_new_mac_filter *i40e_next_filter(struct i40e_new_mac_filter *next)
{
hlist_for_each_entry_continue(next, hlist) { if (!is_broadcast_ether_addr(next->f->macaddr)) return next;
}
return NULL;
}
/** * i40e_update_filter_state - Update filter state based on return data * from firmware * @count: Number of filters added * @add_list: return data from fw * @add_head: pointer to first filter in current batch * * MAC filter entries from list were slated to be added to device. Returns * number of successful filters. Note that 0 does NOT mean success!
**/ staticint
i40e_update_filter_state(int count, struct i40e_aqc_add_macvlan_element_data *add_list, struct i40e_new_mac_filter *add_head)
{ int retval = 0; int i;
for (i = 0; i < count; i++) { /* Always check status of each filter. We don't need to check * the firmware return status because we pre-set the filter * status to I40E_AQC_MM_ERR_NO_RES when sending the filter * request to the adminq. Thus, if it no longer matches then * we know the filter is active.
*/ if (add_list[i].match_method == I40E_AQC_MM_ERR_NO_RES) {
add_head->state = I40E_FILTER_FAILED;
} else {
add_head->state = I40E_FILTER_ACTIVE;
retval++;
}
add_head = i40e_next_filter(add_head); if (!add_head) break;
}
return retval;
}
/** * i40e_aqc_del_filters - Request firmware to delete a set of filters * @vsi: ptr to the VSI * @vsi_name: name to display in messages * @list: the list of filters to send to firmware * @num_del: the number of filters to delete * @retval: Set to -EIO on failure to delete * * Send a request to firmware via AdminQ to delete a set of filters. Uses * *retval instead of a return value so that success does not force ret_val to * be set to 0. This ensures that a sequence of calls to this function * preserve the previous value of *retval on successful delete.
*/ static void i40e_aqc_del_filters(struct i40e_vsi *vsi, constchar *vsi_name, struct i40e_aqc_remove_macvlan_element_data *list, int num_del, int *retval)
{ struct i40e_hw *hw = &vsi->back->hw; enum libie_aq_err aq_status; int aq_ret;
/* Explicitly ignore and do not report when firmware returns ENOENT */ if (aq_ret && !(aq_status == LIBIE_AQ_RC_ENOENT)) {
*retval = -EIO;
dev_info(&vsi->back->pdev->dev, "ignoring delete macvlan error on %s, err %pe, aq_err %s\n",
vsi_name, ERR_PTR(aq_ret), libie_aq_str(aq_status));
}
}
/** * i40e_aqc_add_filters - Request firmware to add a set of filters * @vsi: ptr to the VSI * @vsi_name: name to display in messages * @list: the list of filters to send to firmware * @add_head: Position in the add hlist * @num_add: the number of filters to add * * Send a request to firmware via AdminQ to add a chunk of filters. Will set * __I40E_VSI_OVERFLOW_PROMISC bit in vsi->state if the firmware has run out of * space for more filters.
*/ static void i40e_aqc_add_filters(struct i40e_vsi *vsi, constchar *vsi_name, struct i40e_aqc_add_macvlan_element_data *list, struct i40e_new_mac_filter *add_head, int num_add)
{ struct i40e_hw *hw = &vsi->back->hw; enum libie_aq_err aq_status; int fcnt;
if (fcnt != num_add) { if (vsi->type == I40E_VSI_MAIN) {
set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, promiscuous mode forced on\n",
libie_aq_str(aq_status), vsi_name);
} elseif (vsi->type == I40E_VSI_SRIOV ||
vsi->type == I40E_VSI_VMDQ1 ||
vsi->type == I40E_VSI_VMDQ2) {
dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, please set promiscuous on manually for %s\n",
libie_aq_str(aq_status), vsi_name, vsi_name);
} else {
dev_warn(&vsi->back->pdev->dev, "Error %s adding RX filters on %s, incorrect VSI type: %i.\n",
libie_aq_str(aq_status), vsi_name, vsi->type);
}
}
}
/** * i40e_aqc_broadcast_filter - Set promiscuous broadcast flags * @vsi: pointer to the VSI * @vsi_name: the VSI name * @f: filter data * * This function sets or clears the promiscuous broadcast flags for VLAN * filters in order to properly receive broadcast frames. Assumes that only * broadcast filters are passed. * * Returns status indicating success or failure;
**/ staticint
i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, constchar *vsi_name, struct i40e_mac_filter *f)
{ bool enable = f->state == I40E_FILTER_NEW ||
f->state == I40E_FILTER_NEW_SYNC; struct i40e_hw *hw = &vsi->back->hw; int aq_ret;
if (aq_ret) {
set_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
dev_warn(&vsi->back->pdev->dev, "Error %s, forcing overflow promiscuous on %s\n",
libie_aq_str(hw->aq.asq_last_status), vsi_name);
}
return aq_ret;
}
/** * i40e_set_promiscuous - set promiscuous mode * @pf: board private structure * @promisc: promisc on or off * * There are different ways of setting promiscuous mode on a PF depending on * what state/environment we're in. This identifies and sets it appropriately. * Returns 0 on success.
**/ staticint i40e_set_promiscuous(struct i40e_pf *pf, bool promisc)
{ struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; int aq_ret;
if (vsi->type == I40E_VSI_MAIN &&
i40e_pf_get_main_veb(pf) &&
!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) { /* set defport ON for Main VSI instead of true promisc * this way we will get all unicast/multicast and VLAN * promisc behavior but will not get VF or VMDq traffic * replicated on the Main VSI.
*/ if (promisc)
aq_ret = i40e_aq_set_default_vsi(hw,
vsi->seid,
NULL); else
aq_ret = i40e_aq_clear_default_vsi(hw,
vsi->seid,
NULL); if (aq_ret) {
dev_info(&pf->pdev->dev, "Set default VSI failed, err %pe, aq_err %s\n",
ERR_PTR(aq_ret),
libie_aq_str(hw->aq.asq_last_status));
}
} else {
aq_ret = i40e_aq_set_vsi_unicast_promiscuous(
hw,
vsi->seid,
promisc, NULL, true); if (aq_ret) {
dev_info(&pf->pdev->dev, "set unicast promisc failed, err %pe, aq_err %s\n",
ERR_PTR(aq_ret),
libie_aq_str(hw->aq.asq_last_status));
}
aq_ret = i40e_aq_set_vsi_multicast_promiscuous(
hw,
vsi->seid,
promisc, NULL); if (aq_ret) {
dev_info(&pf->pdev->dev, "set multicast promisc failed, err %pe, aq_err %s\n",
ERR_PTR(aq_ret),
libie_aq_str(hw->aq.asq_last_status));
}
}
if (!aq_ret)
pf->cur_promisc = promisc;
return aq_ret;
}
/** * i40e_sync_vsi_filters - Update the VSI filter list to the HW * @vsi: ptr to the VSI * * Push any outstanding VSI filter changes through the AdminQ. * * Returns 0 or error value
**/ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
{ struct hlist_head tmp_add_list, tmp_del_list; struct i40e_mac_filter *f; struct i40e_new_mac_filter *new, *add_head = NULL; struct i40e_hw *hw = &vsi->back->hw; bool old_overflow, new_overflow; unsignedint failed_filters = 0; unsignedint vlan_filters = 0; char vsi_name[16] = "PF"; int filter_list_len = 0;
u32 changed_flags = 0; struct hlist_node *h; struct i40e_pf *pf; int num_add = 0; int num_del = 0; int aq_ret = 0; int retval = 0;
u16 cmd_flags; int list_size; int bkt;
if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) {
vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED;
spin_lock_bh(&vsi->mac_filter_hash_lock); /* Create a list of filters to delete. */
hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { if (f->state == I40E_FILTER_REMOVE) { /* Move the element into temporary del_list */
hash_del(&f->hlist);
hlist_add_head(&f->hlist, &tmp_del_list);
/* Avoid counting removed filters */ continue;
} if (f->state == I40E_FILTER_NEW) { /* Create a temporary i40e_new_mac_filter */ new = kzalloc(sizeof(*new), GFP_ATOMIC); if (!new) goto err_no_memory_locked;
/* Store pointer to the real filter */
new->f = f;
new->state = f->state;
/* Add it to the hash list */
hlist_add_head(&new->hlist, &tmp_add_list);
f->state = I40E_FILTER_NEW_SYNC;
}
/* Count the number of active (current and new) VLAN * filters we have now. Does not count filters which * are marked for deletion.
*/ if (f->vlan > 0)
vlan_filters++;
}
/* handle broadcast filters by updating the broadcast * promiscuous flag and release filter list.
*/ if (is_broadcast_ether_addr(f->macaddr)) {
i40e_aqc_broadcast_filter(vsi, vsi_name, f);
hlist_del(&f->hlist);
kfree(f); continue;
}
/* add to delete list */
ether_addr_copy(del_list[num_del].mac_addr, f->macaddr); if (f->vlan == I40E_VLAN_ANY) {
del_list[num_del].vlan_tag = 0;
cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
} else {
del_list[num_del].vlan_tag =
cpu_to_le16((u16)(f->vlan));
}
/* flush a full buffer */ if (num_del == filter_list_len) {
i40e_aqc_del_filters(vsi, vsi_name, del_list,
num_del, &retval);
memset(del_list, 0, list_size);
num_del = 0;
} /* Release memory for MAC filter entries which were * synced up with HW.
*/
hlist_del(&f->hlist);
kfree(f);
}
if (num_del) {
i40e_aqc_del_filters(vsi, vsi_name, del_list,
num_del, &retval);
}
kfree(del_list);
del_list = NULL;
}
if (!hlist_empty(&tmp_add_list)) { /* Do all the adds now. */
filter_list_len = hw->aq.asq_buf_size / sizeof(struct i40e_aqc_add_macvlan_element_data);
list_size = filter_list_len * sizeof(struct i40e_aqc_add_macvlan_element_data);
add_list = kzalloc(list_size, GFP_ATOMIC); if (!add_list) goto err_no_memory;
num_add = 0;
hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { /* handle broadcast filters by updating the broadcast * promiscuous flag instead of adding a MAC filter.
*/ if (is_broadcast_ether_addr(new->f->macaddr)) { if (i40e_aqc_broadcast_filter(vsi, vsi_name,
new->f))
new->state = I40E_FILTER_FAILED; else
new->state = I40E_FILTER_ACTIVE; continue;
}
/* add to add array */ if (num_add == 0)
add_head = new;
cmd_flags = 0;
ether_addr_copy(add_list[num_add].mac_addr,
new->f->macaddr); if (new->f->vlan == I40E_VLAN_ANY) {
add_list[num_add].vlan_tag = 0;
cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
} else {
add_list[num_add].vlan_tag =
cpu_to_le16((u16)(new->f->vlan));
}
add_list[num_add].queue_number = 0; /* set invalid match method for later detection */
add_list[num_add].match_method = I40E_AQC_MM_ERR_NO_RES;
cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
add_list[num_add].flags = cpu_to_le16(cmd_flags);
num_add++;
/* flush a full buffer */ if (num_add == filter_list_len) {
i40e_aqc_add_filters(vsi, vsi_name, add_list,
add_head, num_add);
memset(add_list, 0, list_size);
num_add = 0;
}
} if (num_add) {
i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head,
num_add);
} /* Now move all of the filters from the temp add list back to * the VSI's list.
*/
spin_lock_bh(&vsi->mac_filter_hash_lock);
hlist_for_each_entry_safe(new, h, &tmp_add_list, hlist) { /* Only update the state if we're still NEW */ if (new->f->state == I40E_FILTER_NEW ||
new->f->state == I40E_FILTER_NEW_SYNC)
new->f->state = new->state;
hlist_del(&new->hlist);
netdev_hw_addr_refcnt(new->f, vsi->netdev, -1);
kfree(new);
}
spin_unlock_bh(&vsi->mac_filter_hash_lock);
kfree(add_list);
add_list = NULL;
}
/* Determine the number of active and failed filters. */
spin_lock_bh(&vsi->mac_filter_hash_lock);
vsi->active_filters = 0;
hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) { if (f->state == I40E_FILTER_ACTIVE)
vsi->active_filters++; elseif (f->state == I40E_FILTER_FAILED)
failed_filters++;
}
spin_unlock_bh(&vsi->mac_filter_hash_lock);
/* Check if we are able to exit overflow promiscuous mode. We can * safely exit if we didn't just enter, we no longer have any failed * filters, and we have reduced filters below the threshold value.
*/ if (old_overflow && !failed_filters &&
vsi->active_filters < vsi->promisc_threshold) {
dev_info(&pf->pdev->dev, "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
vsi_name);
clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
vsi->promisc_threshold = 0;
}
/* if the VF is not trusted do not do promisc */ if (vsi->type == I40E_VSI_SRIOV && pf->vf &&
!pf->vf[vsi->vf_id].trusted) {
clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state); goto out;
}
/* If we are entering overflow promiscuous, we need to calculate a new * threshold for when we are safe to exit
*/ if (!old_overflow && new_overflow)
vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
/* check for changes in promiscuous modes */ if (changed_flags & IFF_ALLMULTI) { bool cur_multipromisc;
cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
new_overflow);
aq_ret = i40e_set_promiscuous(pf, cur_promisc); if (aq_ret) {
retval = i40e_aq_rc_to_posix(aq_ret,
hw->aq.asq_last_status);
dev_info(&pf->pdev->dev, "Setting promiscuous %s failed on %s, err %pe aq_err %s\n",
cur_promisc ? "on" : "off",
vsi_name,
ERR_PTR(aq_ret),
libie_aq_str(hw->aq.asq_last_status));
}
}
out: /* if something went wrong then set the changed flag so we try again */ if (retval)
vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
err_no_memory: /* Restore elements on the temporary add and delete lists */
spin_lock_bh(&vsi->mac_filter_hash_lock);
err_no_memory_locked:
i40e_undo_del_filter_entries(vsi, &tmp_del_list);
i40e_undo_add_filter_entries(vsi, &tmp_add_list);
spin_unlock_bh(&vsi->mac_filter_hash_lock);
/** * i40e_sync_filters_subtask - Sync the VSI filter list with HW * @pf: board private structure
**/ staticvoid i40e_sync_filters_subtask(struct i40e_pf *pf)
{ struct i40e_vsi *vsi; int v;
if (!pf) return; if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state)) return; if (test_bit(__I40E_VF_DISABLE, pf->state)) {
set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state); return;
}
i40e_pf_for_each_vsi(pf, v, vsi) { if ((vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) &&
!test_bit(__I40E_VSI_RELEASING, vsi->state)) { int ret = i40e_sync_vsi_filters(vsi);
if (ret) { /* come back and try again later */
set_bit(__I40E_MACVLAN_SYNC_PENDING,
pf->state); break;
}
}
}
}
/** * i40e_calculate_vsi_rx_buf_len - Calculates buffer length * * @vsi: VSI to calculate rx_buf_len from
*/ static u16 i40e_calculate_vsi_rx_buf_len(struct i40e_vsi *vsi)
{ if (!vsi->netdev || test_bit(I40E_FLAG_LEGACY_RX_ENA, vsi->back->flags)) return SKB_WITH_OVERHEAD(I40E_RXBUFFER_2048);
/** * i40e_max_vsi_frame_size - returns the maximum allowed frame size for VSI * @vsi: the vsi * @xdp_prog: XDP program
**/ staticint i40e_max_vsi_frame_size(struct i40e_vsi *vsi, struct bpf_prog *xdp_prog)
{
u16 rx_buf_len = i40e_calculate_vsi_rx_buf_len(vsi);
u16 chain_len;
/** * i40e_change_mtu - NDO callback to change the Maximum Transfer Unit * @netdev: network interface device structure * @new_mtu: new value for maximum frame size * * Returns 0 on success, negative on failure
**/ staticint i40e_change_mtu(struct net_device *netdev, int new_mtu)
{ struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; int frame_size;
frame_size = i40e_max_vsi_frame_size(vsi, vsi->xdp_prog); if (new_mtu > frame_size - I40E_PACKET_HDR_PAD) {
netdev_err(netdev, "Error changing mtu to %d, Max is %d\n",
new_mtu, frame_size - I40E_PACKET_HDR_PAD); return -EINVAL;
}
netdev_dbg(netdev, "changing MTU from %d to %d\n",
netdev->mtu, new_mtu);
WRITE_ONCE(netdev->mtu, new_mtu); if (netif_running(netdev))
i40e_vsi_reinit_locked(vsi);
set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
set_bit(__I40E_CLIENT_L2_CHANGE, pf->state); return 0;
}
/** * i40e_vlan_stripping_enable - Turn on vlan stripping for the VSI * @vsi: the vsi being adjusted
**/ void i40e_vlan_stripping_enable(struct i40e_vsi *vsi)
{ struct i40e_vsi_context ctxt; int ret;
/* Don't modify stripping options if a port VLAN is active */ if (vsi->info.pvid) return;
/** * i40e_vlan_stripping_disable - Turn off vlan stripping for the VSI * @vsi: the vsi being adjusted
**/ void i40e_vlan_stripping_disable(struct i40e_vsi *vsi)
{ struct i40e_vsi_context ctxt; int ret;
/* Don't modify stripping options if a port VLAN is active */ if (vsi->info.pvid) return;
/** * i40e_add_vlan_all_mac - Add a MAC/VLAN filter for each existing MAC address * @vsi: the vsi being configured * @vid: vlan id to be added (0 = untagged only , -1 = any) * * This is a helper function for adding a new MAC/VLAN filter with the * specified VLAN for each existing MAC address already in the hash table. * This function does *not* perform any accounting to update filters based on * VLAN mode. * * NOTE: this function expects to be called while under the * mac_filter_hash_lock
**/ int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid)
{ struct i40e_mac_filter *f, *add_f; struct hlist_node *h; int bkt;
hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) { /* If we're asked to add a filter that has been marked for * removal, it is safe to simply restore it to active state. * __i40e_del_filter will have simply deleted any filters which * were previously marked NEW or FAILED, so if it is currently * marked REMOVE it must have previously been ACTIVE. Since we * haven't yet run the sync filters task, just restore this * filter to the ACTIVE state so that the sync task leaves it * in place.
*/ if (f->state == I40E_FILTER_REMOVE && f->vlan == vid) {
f->state = I40E_FILTER_ACTIVE; continue;
} elseif (f->state == I40E_FILTER_REMOVE) { continue;
}
add_f = i40e_add_filter(vsi, f->macaddr, vid); if (!add_f) {
dev_info(&vsi->back->pdev->dev, "Could not add vlan filter %d for %pM\n",
vid, f->macaddr); return -ENOMEM;
}
}
return 0;
}
/** * i40e_vsi_add_vlan - Add VSI membership for given VLAN * @vsi: the VSI being configured * @vid: VLAN id to be added
**/ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, u16 vid)
{ int err;
if (vsi->info.pvid) return -EINVAL;
/* The network stack will attempt to add VID=0, with the intention to * receive priority tagged packets with a VLAN of 0. Our HW receives * these packets by default when configured to receive untagged * packets, so we don't need to add a filter for this case. * Additionally, HW interprets adding a VID=0 filter as meaning to * receive *only* tagged traffic and stops receiving untagged traffic. * Thus, we do not want to actually add a filter for VID=0
*/ if (!vid) return 0;
/* Locked once because all functions invoked below iterates list*/
spin_lock_bh(&vsi->mac_filter_hash_lock);
err = i40e_add_vlan_all_mac(vsi, vid);
spin_unlock_bh(&vsi->mac_filter_hash_lock); if (err) return err;
/* schedule our worker thread which will take care of * applying the new filter changes
*/
i40e_service_event_schedule(vsi->back); return 0;
}
/** * i40e_rm_vlan_all_mac - Remove MAC/VLAN pair for all MAC with the given VLAN * @vsi: the vsi being configured * @vid: vlan id to be removed (0 = untagged only , -1 = any) * * This function should be used to remove all VLAN filters which match the * given VID. It does not schedule the service event and does not take the * mac_filter_hash_lock so it may be combined with other operations under * a single invocation of the mac_filter_hash_lock. * * NOTE: this function expects to be called while under the * mac_filter_hash_lock
*/ void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid)
{ struct i40e_mac_filter *f; struct hlist_node *h; int bkt;
/** * i40e_vsi_kill_vlan - Remove VSI membership for given VLAN * @vsi: the VSI being configured * @vid: VLAN id to be removed
**/ void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, u16 vid)
{ if (!vid || vsi->info.pvid) return;
/* schedule our worker thread which will take care of * applying the new filter changes
*/
i40e_service_event_schedule(vsi->back);
}
/** * i40e_vlan_rx_add_vid - Add a vlan id filter to HW offload * @netdev: network interface to be adjusted * @proto: unused protocol value * @vid: vlan id to be added * * net_device_ops implementation for adding vlan ids
**/ staticint i40e_vlan_rx_add_vid(struct net_device *netdev,
__always_unused __be16 proto, u16 vid)
{ struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; int ret = 0;
if (vid >= VLAN_N_VID) return -EINVAL;
ret = i40e_vsi_add_vlan(vsi, vid); if (!ret)
set_bit(vid, vsi->active_vlans);
return ret;
}
/** * i40e_vlan_rx_add_vid_up - Add a vlan id filter to HW offload in UP path * @netdev: network interface to be adjusted * @proto: unused protocol value * @vid: vlan id to be added
**/ staticvoid i40e_vlan_rx_add_vid_up(struct net_device *netdev,
__always_unused __be16 proto, u16 vid)
{ struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi;
if (vid >= VLAN_N_VID) return;
set_bit(vid, vsi->active_vlans);
}
/** * i40e_vlan_rx_kill_vid - Remove a vlan id filter from HW offload * @netdev: network interface to be adjusted * @proto: unused protocol value * @vid: vlan id to be removed * * net_device_ops implementation for removing vlan ids
**/ staticint i40e_vlan_rx_kill_vid(struct net_device *netdev,
__always_unused __be16 proto, u16 vid)
{ struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi;
/* return code is ignored as there is nothing a user * can do about failure to remove and a log message was * already printed from the other function
*/
i40e_vsi_kill_vlan(vsi, vid);
clear_bit(vid, vsi->active_vlans);
return 0;
}
/** * i40e_restore_vlan - Reinstate vlans when vsi/netdev comes back up * @vsi: the vsi being brought back up
**/ staticvoid i40e_restore_vlan(struct i40e_vsi *vsi)
{
u16 vid;
if (!vsi->netdev) return;
if (vsi->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
i40e_vlan_stripping_enable(vsi); else
i40e_vlan_stripping_disable(vsi);
/** * i40e_vsi_add_pvid - Add pvid for the VSI * @vsi: the vsi being adjusted * @vid: the vlan id to set as a PVID
**/ int i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid)
{ struct i40e_vsi_context ctxt; int ret;
/** * i40e_vsi_remove_pvid - Remove the pvid from the VSI * @vsi: the vsi being adjusted * * Just use the vlan_rx_register() service to put it back to normal
**/ void i40e_vsi_remove_pvid(struct i40e_vsi *vsi)
{
vsi->info.pvid = 0;
i40e_vlan_stripping_disable(vsi);
}
/** * i40e_vsi_setup_tx_resources - Allocate VSI Tx queue resources * @vsi: ptr to the VSI * * If this function returns with an error, then it's possible one or * more of the rings is populated (while the rest are not). It is the * callers duty to clean those orphaned rings. * * Return 0 on success, negative on failure
**/ staticint i40e_vsi_setup_tx_resources(struct i40e_vsi *vsi)
{ int i, err = 0;
for (i = 0; i < vsi->num_queue_pairs && !err; i++)
err = i40e_setup_tx_descriptors(vsi->tx_rings[i]);
if (!i40e_enabled_xdp_vsi(vsi)) return err;
for (i = 0; i < vsi->num_queue_pairs && !err; i++)
err = i40e_setup_tx_descriptors(vsi->xdp_rings[i]);
return err;
}
/** * i40e_vsi_free_tx_resources - Free Tx resources for VSI queues * @vsi: ptr to the VSI * * Free VSI's transmit software resources
**/ staticvoid i40e_vsi_free_tx_resources(struct i40e_vsi *vsi)
{ int i;
if (vsi->tx_rings) { for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
i40e_free_tx_resources(vsi->tx_rings[i]);
}
if (vsi->xdp_rings) { for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->xdp_rings[i] && vsi->xdp_rings[i]->desc)
i40e_free_tx_resources(vsi->xdp_rings[i]);
}
}
/** * i40e_vsi_setup_rx_resources - Allocate VSI queues Rx resources * @vsi: ptr to the VSI * * If this function returns with an error, then it's possible one or * more of the rings is populated (while the rest are not). It is the * callers duty to clean those orphaned rings. * * Return 0 on success, negative on failure
**/ staticint i40e_vsi_setup_rx_resources(struct i40e_vsi *vsi)
{ int i, err = 0;
for (i = 0; i < vsi->num_queue_pairs && !err; i++)
err = i40e_setup_rx_descriptors(vsi->rx_rings[i]); return err;
}
/** * i40e_vsi_free_rx_resources - Free Rx Resources for VSI queues * @vsi: ptr to the VSI * * Free all receive software resources
**/ staticvoid i40e_vsi_free_rx_resources(struct i40e_vsi *vsi)
{ int i;
if (!vsi->rx_rings) return;
for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->rx_rings[i] && vsi->rx_rings[i]->desc)
i40e_free_rx_resources(vsi->rx_rings[i]);
}
/** * i40e_config_xps_tx_ring - Configure XPS for a Tx ring * @ring: The Tx ring to configure * * This enables/disables XPS for a given Tx descriptor ring * based on the TCs enabled for the VSI that ring belongs to.
**/ staticvoid i40e_config_xps_tx_ring(struct i40e_ring *ring)
{ int cpu;
if (!ring->q_vector || !ring->netdev || ring->ch) return;
/* We only initialize XPS once, so as not to overwrite user settings */ if (test_and_set_bit(__I40E_TX_XPS_INIT_DONE, ring->state)) return;
cpu = cpumask_local_spread(ring->q_vector->v_idx, -1);
netif_set_xps_queue(ring->netdev, get_cpu_mask(cpu),
ring->queue_index);
}
/** * i40e_xsk_pool - Retrieve the AF_XDP buffer pool if XDP and ZC is enabled * @ring: The Tx or Rx ring * * Returns the AF_XDP buffer pool or NULL.
**/ staticstruct xsk_buff_pool *i40e_xsk_pool(struct i40e_ring *ring)
{ bool xdp_on = i40e_enabled_xdp_vsi(ring->vsi); int qid = ring->queue_index;
if (ring_is_xdp(ring))
qid -= ring->vsi->alloc_queue_pairs;
if (!xdp_on || !test_bit(qid, ring->vsi->af_xdp_zc_qps)) return NULL;
/* clear the context structure first */
memset(&tx_ctx, 0, sizeof(tx_ctx));
tx_ctx.new_context = 1;
tx_ctx.base = (ring->dma / 128);
tx_ctx.qlen = ring->count; if (test_bit(I40E_FLAG_FD_SB_ENA, vsi->back->flags) ||
test_bit(I40E_FLAG_FD_ATR_ENA, vsi->back->flags))
tx_ctx.fd_ena = 1; if (test_bit(I40E_FLAG_PTP_ENA, vsi->back->flags))
tx_ctx.timesync_ena = 1; /* FDIR VSI tx ring can still use RS bit and writebacks */ if (vsi->type != I40E_VSI_FDIR)
tx_ctx.head_wb_ena = 1;
tx_ctx.head_wb_addr = ring->dma +
(ring->count * sizeof(struct i40e_tx_desc));
/* As part of VSI creation/update, FW allocates certain * Tx arbitration queue sets for each TC enabled for * the VSI. The FW returns the handles to these queue * sets as part of the response buffer to Add VSI, * Update VSI, etc. AQ commands. It is expected that * these queue set handles be associated with the Tx * queues by the driver as part of the TX queue context * initialization. This has to be done regardless of * DCB as by default everything is mapped to TC0.
*/
if (ring->ch)
tx_ctx.rdylist =
le16_to_cpu(ring->ch->info.qs_handle[ring->dcb_tc]);
/* clear the context in the HMC */
err = i40e_clear_lan_tx_queue_context(hw, pf_q); if (err) {
dev_info(&vsi->back->pdev->dev, "Failed to clear LAN Tx queue context on Tx ring %d (pf_q %d), error: %d\n",
ring->queue_index, pf_q, err); return -ENOMEM;
}
/* set the context in the HMC */
err = i40e_set_lan_tx_queue_context(hw, pf_q, &tx_ctx); if (err) {
dev_info(&vsi->back->pdev->dev, "Failed to set LAN Tx queue context on Tx ring %d (pf_q %d, error: %d\n",
ring->queue_index, pf_q, err); return -ENOMEM;
}
/* Now associate this queue with this PCI function */ if (ring->ch) { if (ring->ch->type == I40E_VSI_VMDQ2)
qtx_ctl = I40E_QTX_CTL_VM_QUEUE; else return -EINVAL;
/* cache tail off for easier writes later */
ring->tail = hw->hw_addr + I40E_QTX_TAIL(pf_q);
return 0;
}
/** * i40e_rx_offset - Return expected offset into page to access data * @rx_ring: Ring we are requesting offset of * * Returns the offset value for ring into the data buffer.
*/ staticunsignedint i40e_rx_offset(struct i40e_ring *rx_ring)
{ return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
}
/** * i40e_configure_rx_ring - Configure a receive ring context * @ring: The Rx ring to configure * * Configure the Rx descriptor ring in the HMC context.
**/ staticint i40e_configure_rx_ring(struct i40e_ring *ring)
{ struct i40e_vsi *vsi = ring->vsi;
u32 chain_len = vsi->back->hw.func_caps.rx_buf_chain_len;
u16 pf_q = vsi->base_queue + ring->queue_index; struct i40e_hw *hw = &vsi->back->hw; struct i40e_hmc_obj_rxq rx_ctx; int err = 0; bool ok;
/* descriptor type is always zero * rx_ctx.dtype = 0;
*/
rx_ctx.hsplit_0 = 0;
rx_ctx.rxmax = min_t(u16, vsi->max_frame, chain_len * ring->rx_buf_len); if (hw->revision_id == 0)
rx_ctx.lrxqthresh = 0; else
rx_ctx.lrxqthresh = 1;
rx_ctx.crcstrip = 1;
rx_ctx.l2tsel = 1; /* this controls whether VLAN is stripped from inner headers */
rx_ctx.showiv = 0; /* set the prefena field to 1 because the manual says to */
rx_ctx.prefena = 1;
/* clear the context in the HMC */
err = i40e_clear_lan_rx_queue_context(hw, pf_q); if (err) {
dev_info(&vsi->back->pdev->dev, "Failed to clear LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
ring->queue_index, pf_q, err); return -ENOMEM;
}
/* set the context in the HMC */
err = i40e_set_lan_rx_queue_context(hw, pf_q, &rx_ctx); if (err) {
dev_info(&vsi->back->pdev->dev, "Failed to set LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
ring->queue_index, pf_q, err); return -ENOMEM;
}
/* configure Rx buffer alignment */ if (!vsi->netdev || test_bit(I40E_FLAG_LEGACY_RX_ENA, vsi->back->flags)) { if (I40E_2K_TOO_SMALL_WITH_PADDING) {
dev_info(&vsi->back->pdev->dev, "2k Rx buffer is too small to fit standard MTU and skb_shared_info\n"); return -EOPNOTSUPP;
}
clear_ring_build_skb_enabled(ring);
} else {
set_ring_build_skb_enabled(ring);
}
ring->rx_offset = i40e_rx_offset(ring);
/* cache tail for quicker writes, and clear the reg before use */
ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
writel(0, ring->tail);
if (ring->xsk_pool) {
xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
ok = i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring));
} else {
ok = !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
} if (!ok) { /* Log this in case the user has forgotten to give the kernel * any buffers, even later in the application.
*/
dev_info(&vsi->back->pdev->dev, "Failed to allocate some buffers on %sRx ring %d (pf_q %d)\n",
ring->xsk_pool ? "AF_XDP ZC enabled " : "",
ring->queue_index, pf_q);
}
return 0;
}
/** * i40e_vsi_configure_tx - Configure the VSI for Tx * @vsi: VSI structure describing this set of rings and resources * * Configure the Tx VSI for operation.
**/ staticint i40e_vsi_configure_tx(struct i40e_vsi *vsi)
{ int err = 0;
u16 i;
for (i = 0; (i < vsi->num_queue_pairs) && !err; i++)
err = i40e_configure_tx_ring(vsi->tx_rings[i]);
if (err || !i40e_enabled_xdp_vsi(vsi)) return err;
for (i = 0; (i < vsi->num_queue_pairs) && !err; i++)
err = i40e_configure_tx_ring(vsi->xdp_rings[i]);
return err;
}
/** * i40e_vsi_configure_rx - Configure the VSI for Rx * @vsi: the VSI being configured * * Configure the Rx VSI for operation.
**/ staticint i40e_vsi_configure_rx(struct i40e_vsi *vsi)
{ int err = 0;
u16 i;
/** * i40e_set_vsi_rx_mode - Call set_rx_mode on a VSI * @vsi: ptr to the VSI
**/ staticvoid i40e_set_vsi_rx_mode(struct i40e_vsi *vsi)
{ if (vsi->netdev)
i40e_set_rx_mode(vsi->netdev);
}
/** * i40e_reset_fdir_filter_cnt - Reset flow director filter counters * @pf: Pointer to the targeted PF * * Set all flow director counters to 0.
*/ staticvoid i40e_reset_fdir_filter_cnt(struct i40e_pf *pf)
{
pf->fd_tcp4_filter_cnt = 0;
pf->fd_udp4_filter_cnt = 0;
pf->fd_sctp4_filter_cnt = 0;
pf->fd_ip4_filter_cnt = 0;
pf->fd_tcp6_filter_cnt = 0;
pf->fd_udp6_filter_cnt = 0;
pf->fd_sctp6_filter_cnt = 0;
pf->fd_ip6_filter_cnt = 0;
}
/** * i40e_fdir_filter_restore - Restore the Sideband Flow Director filters * @vsi: Pointer to the targeted VSI * * This function replays the hlist on the hw where all the SB Flow Director * filters were saved.
**/ staticvoid i40e_fdir_filter_restore(struct i40e_vsi *vsi)
{ struct i40e_fdir_filter *filter; struct i40e_pf *pf = vsi->back; struct hlist_node *node;
if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) return;
/* Reset FDir counters as we're replaying all existing filters */
i40e_reset_fdir_filter_cnt(pf);
/** * i40e_vsi_configure - Set up the VSI for action * @vsi: the VSI being configured
**/ staticint i40e_vsi_configure(struct i40e_vsi *vsi)
{ int err;
/** * i40e_vsi_configure_msix - MSIX mode Interrupt Config in the HW * @vsi: the VSI being configured
**/ staticvoid i40e_vsi_configure_msix(struct i40e_vsi *vsi)
{ bool has_xdp = i40e_enabled_xdp_vsi(vsi); struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw;
u16 vector; int i, q;
u32 qp;
/* The interrupt indexing is offset by 1 in the PFINT_ITRn * and PFINT_LNKLSTn registers, e.g.: * PFINT_ITRn[0..n-1] gets msix-1..msix-n (qpair interrupts)
*/
qp = vsi->base_queue;
vector = vsi->base_vector; for (i = 0; i < vsi->num_q_vectors; i++, vector++) { struct i40e_q_vector *q_vector = vsi->q_vectors[i];
/* Associate the queue pair to the vector and enable the queue * interrupt RX queue in linked list with next queue set to TX
*/
wr32(hw, I40E_QINT_RQCTL(0), I40E_QINT_RQCTL_VAL(nextqp, 0, TX));
if (i40e_enabled_xdp_vsi(vsi)) { /* TX queue in linked list with next queue set to TX */
wr32(hw, I40E_QINT_TQCTL(nextqp),
I40E_QINT_TQCTL_VAL(nextqp, 0, TX));
}
/* last TX queue so the next RX queue doesn't matter */
wr32(hw, I40E_QINT_TQCTL(0),
I40E_QINT_TQCTL_VAL(I40E_QUEUE_END_OF_LIST, 0, RX));
i40e_flush(hw);
}
if (!q_vector->tx.ring && !q_vector->rx.ring) return IRQ_HANDLED;
napi_schedule_irqoff(&q_vector->napi);
return IRQ_HANDLED;
}
/** * i40e_irq_affinity_notify - Callback for affinity changes * @notify: context as to what irq was changed * @mask: the new affinity mask * * This is a callback function used by the irq_set_affinity_notifier function * so that we may register to receive changes to the irq affinity masks.
**/ staticvoid i40e_irq_affinity_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
{ struct i40e_q_vector *q_vector =
container_of(notify, struct i40e_q_vector, affinity_notify);
cpumask_copy(&q_vector->affinity_mask, mask);
}
/** * i40e_irq_affinity_release - Callback for affinity notifier release * @ref: internal core kernel usage * * This is a callback function used by the irq_set_affinity_notifier function * to inform the current notification subscriber that they will no longer * receive notifications.
**/ staticvoid i40e_irq_affinity_release(struct kref *ref) {}
/** * i40e_vsi_request_irq_msix - Initialize MSI-X interrupts * @vsi: the VSI being configured * @basename: name for the vector * * Allocates MSI-X vectors and requests interrupts from the kernel.
**/ staticint i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
{ int q_vectors = vsi->num_q_vectors; struct i40e_pf *pf = vsi->back; int base = vsi->base_vector; int rx_int_idx = 0; int tx_int_idx = 0; int vector, err; int irq_num; int cpu;
/* register for affinity change notifications */
q_vector->irq_num = irq_num;
q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
q_vector->affinity_notify.release = i40e_irq_affinity_release;
irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify); /* Spread affinity hints out across online CPUs. * * get_cpu_mask returns a static constant mask with * a permanent lifetime so it's ok to pass to * irq_update_affinity_hint without making a copy.
*/
cpu = cpumask_local_spread(q_vector->v_idx, -1);
irq_update_affinity_hint(irq_num, get_cpu_mask(cpu));
}
/** * i40e_vsi_disable_irq - Mask off queue interrupt generation on the VSI * @vsi: the VSI being un-configured
**/ staticvoid i40e_vsi_disable_irq(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int base = vsi->base_vector; int i;
/* disable interrupt causation from each queue */ for (i = 0; i < vsi->num_queue_pairs; i++) {
u32 val;
val = rd32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx));
val &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i]->reg_idx), val);
val = rd32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx));
val &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i]->reg_idx), val);
if (!i40e_enabled_xdp_vsi(vsi)) continue;
wr32(hw, I40E_QINT_TQCTL(vsi->xdp_rings[i]->reg_idx), 0);
}
/* disable each interrupt */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { for (i = vsi->base_vector;
i < (vsi->num_q_vectors + vsi->base_vector); i++)
wr32(hw, I40E_PFINT_DYN_CTLN(i - 1), 0);
i40e_flush(hw); for (i = 0; i < vsi->num_q_vectors; i++)
synchronize_irq(pf->msix_entries[i + base].vector);
} else { /* Legacy and MSI mode - this stops all interrupt handling */
wr32(hw, I40E_PFINT_ICR0_ENA, 0);
wr32(hw, I40E_PFINT_DYN_CTL0, 0);
i40e_flush(hw);
synchronize_irq(pf->pdev->irq);
}
}
/** * i40e_vsi_enable_irq - Enable IRQ for the given VSI * @vsi: the VSI being configured
**/ staticint i40e_vsi_enable_irq(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; int i;
if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { for (i = 0; i < vsi->num_q_vectors; i++)
i40e_irq_dynamic_enable(vsi, i);
} else {
i40e_irq_dynamic_enable_icr0(pf);
}
/** * i40e_intr - MSI/Legacy and non-queue interrupt handler * @irq: interrupt number * @data: pointer to a q_vector * * This is the handler used for all MSI/Legacy interrupts, and deals * with both queue and non-queue interrupts. This is also used in * MSIX mode to handle the non-queue interrupts.
**/ static irqreturn_t i40e_intr(int irq, void *data)
{ struct i40e_pf *pf = (struct i40e_pf *)data; struct i40e_hw *hw = &pf->hw;
irqreturn_t ret = IRQ_NONE;
u32 icr0, icr0_remaining;
u32 val, ena_mask;
/* if sharing a legacy IRQ, we might get called w/o an intr pending */ if ((icr0 & I40E_PFINT_ICR0_INTEVENT_MASK) == 0) goto enable_intr;
/* if interrupt but no bits showing, must be SWINT */ if (((icr0 & ~I40E_PFINT_ICR0_INTEVENT_MASK) == 0) ||
(icr0 & I40E_PFINT_ICR0_SWINT_MASK))
pf->sw_int_count++;
/* only q0 is used in MSI/Legacy mode, and none are used in MSIX */ if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) { struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_q_vector *q_vector = vsi->q_vectors[0];
/* We do not have a way to disarm Queue causes while leaving * interrupt enabled for all other causes, ideally * interrupt should be disabled while we are in NAPI but * this is not a performance path and napi_schedule() * can deal with rescheduling.
*/ if (!test_bit(__I40E_DOWN, pf->state))
napi_schedule_irqoff(&q_vector->napi);
}
if (icr0 & I40E_PFINT_ICR0_GRST_MASK) { if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
set_bit(__I40E_RESET_INTR_RECEIVED, pf->state);
ena_mask &= ~I40E_PFINT_ICR0_ENA_GRST_MASK;
val = rd32(hw, I40E_GLGEN_RSTAT);
val = FIELD_GET(I40E_GLGEN_RSTAT_RESET_TYPE_MASK, val); if (val == I40E_RESET_CORER) {
pf->corer_count++;
} elseif (val == I40E_RESET_GLOBR) {
pf->globr_count++;
} elseif (val == I40E_RESET_EMPR) {
pf->empr_count++;
set_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state);
}
}
if (icr0 & I40E_PFINT_ICR0_HMC_ERR_MASK) {
icr0 &= ~I40E_PFINT_ICR0_HMC_ERR_MASK;
dev_info(&pf->pdev->dev, "HMC error interrupt\n");
dev_info(&pf->pdev->dev, "HMC error info 0x%x, HMC error data 0x%x\n",
rd32(hw, I40E_PFHMC_ERRORINFO),
rd32(hw, I40E_PFHMC_ERRORDATA));
}
if (icr0 & I40E_PFINT_ICR0_TIMESYNC_MASK) {
u32 prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_0);
if (prttsyn_stat & I40E_PRTTSYN_STAT_0_EVENT0_MASK)
schedule_work(&pf->ptp_extts0_work);
if (prttsyn_stat & I40E_PRTTSYN_STAT_0_TXTIME_MASK)
i40e_ptp_tx_hwtstamp(pf);
icr0 &= ~I40E_PFINT_ICR0_ENA_TIMESYNC_MASK;
}
/* If a critical error is pending we have no choice but to reset the * device. * Report and mask out any remaining unexpected interrupts.
*/
icr0_remaining = icr0 & ena_mask; if (icr0_remaining) {
dev_info(&pf->pdev->dev, "unhandled interrupt icr0=0x%08x\n",
icr0_remaining); if ((icr0_remaining & I40E_PFINT_ICR0_PE_CRITERR_MASK) ||
(icr0_remaining & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) ||
(icr0_remaining & I40E_PFINT_ICR0_ECC_ERR_MASK)) {
dev_info(&pf->pdev->dev, "device will be reset\n");
set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
i40e_service_event_schedule(pf);
}
ena_mask &= ~icr0_remaining;
}
ret = IRQ_HANDLED;
/** * i40e_clean_fdir_tx_irq - Reclaim resources after transmit completes * @tx_ring: tx ring to clean * @budget: how many cleans we're allowed * * Returns true if there's any budget left (e.g. the clean is finished)
**/ staticbool i40e_clean_fdir_tx_irq(struct i40e_ring *tx_ring, int budget)
{ struct i40e_vsi *vsi = tx_ring->vsi;
u16 i = tx_ring->next_to_clean; struct i40e_tx_buffer *tx_buf; struct i40e_tx_desc *tx_desc;
tx_buf = &tx_ring->tx_bi[i];
tx_desc = I40E_TX_DESC(tx_ring, i);
i -= tx_ring->count;
do { struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
/* if next_to_watch is not set then there is no work pending */ if (!eop_desc) break;
/* prevent any other reads prior to eop_desc */
smp_rmb();
/* if the descriptor isn't done, no work yet to do */ if (!(eop_desc->cmd_type_offset_bsz &
cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE))) break;
/* move us past the eop_desc for start of next FD desc */
tx_buf++;
tx_desc++;
i++; if (unlikely(!i)) {
i -= tx_ring->count;
tx_buf = tx_ring->tx_bi;
tx_desc = I40E_TX_DESC(tx_ring, 0);
}
/* update budget accounting */
budget--;
} while (likely(budget));
i += tx_ring->count;
tx_ring->next_to_clean = i;
if (test_bit(I40E_FLAG_MSIX_ENA, vsi->back->flags))
i40e_irq_dynamic_enable(vsi, tx_ring->q_vector->v_idx);
return budget > 0;
}
/** * i40e_fdir_clean_ring - Interrupt Handler for FDIR SB ring * @irq: interrupt number * @data: pointer to a q_vector
**/ static irqreturn_t i40e_fdir_clean_ring(int irq, void *data)
{ struct i40e_q_vector *q_vector = data; struct i40e_vsi *vsi;
if (!q_vector->tx.ring) return IRQ_HANDLED;
vsi = q_vector->tx.ring->vsi;
i40e_clean_fdir_tx_irq(q_vector->tx.ring, vsi->work_limit);
return IRQ_HANDLED;
}
/** * i40e_map_vector_to_qp - Assigns the queue pair to the vector * @vsi: the VSI being configured * @v_idx: vector index * @qp_idx: queue pair index
**/ staticvoid i40e_map_vector_to_qp(struct i40e_vsi *vsi, int v_idx, int qp_idx)
{ struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; struct i40e_ring *tx_ring = vsi->tx_rings[qp_idx]; struct i40e_ring *rx_ring = vsi->rx_rings[qp_idx];
/* Place XDP Tx ring in the same q_vector ring list as regular Tx */ if (i40e_enabled_xdp_vsi(vsi)) { struct i40e_ring *xdp_ring = vsi->xdp_rings[qp_idx];
/** * i40e_vsi_map_rings_to_vectors - Maps descriptor rings to vectors * @vsi: the VSI being configured * * This function maps descriptor rings to the queue-specific vectors * we were allotted through the MSI-X enabling code. Ideally, we'd have * one vector per queue pair, but on a constrained vector budget, we * group the queue pairs as "efficiently" as possible.
**/ staticvoid i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi)
{ int qp_remaining = vsi->num_queue_pairs; int q_vectors = vsi->num_q_vectors; int num_ringpairs; int v_start = 0; int qp_idx = 0;
/* If we don't have enough vectors for a 1-to-1 mapping, we'll have to * group them so there are multiple queues per vector. * It is also important to go through all the vectors available to be * sure that if we don't use all the vectors, that the remaining vectors * are cleared. This is especially important when decreasing the * number of queues in use.
*/ for (; v_start < q_vectors; v_start++) { struct i40e_q_vector *q_vector = vsi->q_vectors[v_start];
/** * i40e_vsi_request_irq - Request IRQ from the OS * @vsi: the VSI being configured * @basename: name for the vector
**/ staticint i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename)
{ struct i40e_pf *pf = vsi->back; int err;
if (err)
dev_info(&pf->pdev->dev, "request_irq failed, Error %d\n", err);
return err;
}
#ifdef CONFIG_NET_POLL_CONTROLLER /** * i40e_netpoll - A Polling 'interrupt' handler * @netdev: network interface device structure * * This is used by netconsole to send skbs without having to re-enable * interrupts. It's not called while the normal interrupt routine is executing.
**/ staticvoid i40e_netpoll(struct net_device *netdev)
{ struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; int i;
/* if interface is down do nothing */ if (test_bit(__I40E_VSI_DOWN, vsi->state)) return;
if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { for (i = 0; i < vsi->num_q_vectors; i++)
i40e_msix_clean_rings(0, vsi->q_vectors[i]);
} else {
i40e_intr(pf->pdev->irq, netdev);
}
} #endif
#define I40E_QTX_ENA_WAIT_COUNT 50
/** * i40e_pf_txq_wait - Wait for a PF's Tx queue to be enabled or disabled * @pf: the PF being configured * @pf_q: the PF queue * @enable: enable or disable state of the queue * * This routine will wait for the given Tx queue of the PF to reach the * enabled or disabled state. * Returns -ETIMEDOUT in case of failing to reach the requested state after * multiple retries; else will return 0 in case of success.
**/ staticint i40e_pf_txq_wait(struct i40e_pf *pf, int pf_q, bool enable)
{ int i;
u32 tx_reg;
for (i = 0; i < I40E_QUEUE_WAIT_RETRY_LIMIT; i++) {
tx_reg = rd32(&pf->hw, I40E_QTX_ENA(pf_q)); if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) break;
usleep_range(10, 20);
} if (i >= I40E_QUEUE_WAIT_RETRY_LIMIT) return -ETIMEDOUT;
return 0;
}
/** * i40e_control_tx_q - Start or stop a particular Tx queue * @pf: the PF structure * @pf_q: the PF queue to configure * @enable: start or stop the queue * * This function enables or disables a single queue. Note that any delay * required after the operation is expected to be handled by the caller of * this function.
**/ staticvoid i40e_control_tx_q(struct i40e_pf *pf, int pf_q, bool enable)
{ struct i40e_hw *hw = &pf->hw;
u32 tx_reg; int i;
/* warn the TX unit of coming changes */
i40e_pre_tx_queue_cfg(&pf->hw, pf_q, enable); if (!enable)
usleep_range(10, 20);
for (i = 0; i < I40E_QTX_ENA_WAIT_COUNT; i++) {
tx_reg = rd32(hw, I40E_QTX_ENA(pf_q)); if (((tx_reg >> I40E_QTX_ENA_QENA_REQ_SHIFT) & 1) ==
((tx_reg >> I40E_QTX_ENA_QENA_STAT_SHIFT) & 1)) break;
usleep_range(1000, 2000);
}
/* Skip if the queue is already in the requested state */ if (enable == !!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) return;
/** * i40e_control_wait_tx_q - Start/stop Tx queue and wait for completion * @seid: VSI SEID * @pf: the PF structure * @pf_q: the PF queue to configure * @is_xdp: true if the queue is used for XDP * @enable: start or stop the queue
**/ int i40e_control_wait_tx_q(int seid, struct i40e_pf *pf, int pf_q, bool is_xdp, bool enable)
{ int ret;
i40e_control_tx_q(pf, pf_q, enable);
/* wait for the change to finish */
ret = i40e_pf_txq_wait(pf, pf_q, enable); if (ret) {
dev_info(&pf->pdev->dev, "VSI seid %d %sTx ring %d %sable timeout\n",
seid, (is_xdp ? "XDP " : ""), pf_q,
(enable ? "en" : "dis"));
}
return ret;
}
/** * i40e_vsi_enable_tx - Start a VSI's rings * @vsi: the VSI being configured
**/ staticint i40e_vsi_enable_tx(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0;
pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
ret = i40e_control_wait_tx_q(vsi->seid, pf,
pf_q, false/*is xdp*/, true); if (ret) break;
if (!i40e_enabled_xdp_vsi(vsi)) continue;
ret = i40e_control_wait_tx_q(vsi->seid, pf,
pf_q + vsi->alloc_queue_pairs, true/*is xdp*/, true); if (ret) break;
} return ret;
}
/** * i40e_pf_rxq_wait - Wait for a PF's Rx queue to be enabled or disabled * @pf: the PF being configured * @pf_q: the PF queue * @enable: enable or disable state of the queue * * This routine will wait for the given Rx queue of the PF to reach the * enabled or disabled state. * Returns -ETIMEDOUT in case of failing to reach the requested state after * multiple retries; else will return 0 in case of success.
**/ staticint i40e_pf_rxq_wait(struct i40e_pf *pf, int pf_q, bool enable)
{ int i;
u32 rx_reg;
for (i = 0; i < I40E_QUEUE_WAIT_RETRY_LIMIT; i++) {
rx_reg = rd32(&pf->hw, I40E_QRX_ENA(pf_q)); if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK)) break;
usleep_range(10, 20);
} if (i >= I40E_QUEUE_WAIT_RETRY_LIMIT) return -ETIMEDOUT;
return 0;
}
/** * i40e_control_rx_q - Start or stop a particular Rx queue * @pf: the PF structure * @pf_q: the PF queue to configure * @enable: start or stop the queue * * This function enables or disables a single queue. Note that * any delay required after the operation is expected to be * handled by the caller of this function.
**/ staticvoid i40e_control_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
{ struct i40e_hw *hw = &pf->hw;
u32 rx_reg; int i;
for (i = 0; i < I40E_QTX_ENA_WAIT_COUNT; i++) {
rx_reg = rd32(hw, I40E_QRX_ENA(pf_q)); if (((rx_reg >> I40E_QRX_ENA_QENA_REQ_SHIFT) & 1) ==
((rx_reg >> I40E_QRX_ENA_QENA_STAT_SHIFT) & 1)) break;
usleep_range(1000, 2000);
}
/* Skip if the queue is already in the requested state */ if (enable == !!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK)) return;
/* turn on/off the queue */ if (enable)
rx_reg |= I40E_QRX_ENA_QENA_REQ_MASK; else
rx_reg &= ~I40E_QRX_ENA_QENA_REQ_MASK;
wr32(hw, I40E_QRX_ENA(pf_q), rx_reg);
}
/** * i40e_control_wait_rx_q * @pf: the PF structure * @pf_q: queue being configured * @enable: start or stop the rings * * This function enables or disables a single queue along with waiting * for the change to finish. The caller of this function should handle * the delays needed in the case of disabling queues.
**/ int i40e_control_wait_rx_q(struct i40e_pf *pf, int pf_q, bool enable)
{ int ret = 0;
i40e_control_rx_q(pf, pf_q, enable);
/* wait for the change to finish */
ret = i40e_pf_rxq_wait(pf, pf_q, enable); if (ret) return ret;
return ret;
}
/** * i40e_vsi_enable_rx - Start a VSI's rings * @vsi: the VSI being configured
**/ staticint i40e_vsi_enable_rx(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; int i, pf_q, ret = 0;
pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
ret = i40e_control_wait_rx_q(pf, pf_q, true); if (ret) {
dev_info(&pf->pdev->dev, "VSI seid %d Rx ring %d enable timeout\n",
vsi->seid, pf_q); break;
}
}
return ret;
}
/** * i40e_vsi_start_rings - Start a VSI's rings * @vsi: the VSI being configured
**/ int i40e_vsi_start_rings(struct i40e_vsi *vsi)
{ int ret = 0;
/* do rx first for enable and last for disable */
ret = i40e_vsi_enable_rx(vsi); if (ret) return ret;
ret = i40e_vsi_enable_tx(vsi);
return ret;
}
#define I40E_DISABLE_TX_GAP_MSEC 50
/** * i40e_vsi_stop_rings - Stop a VSI's rings * @vsi: the VSI being configured
**/ void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back;
u32 pf_q, tx_q_end, rx_q_end;
/* When port TX is suspended, don't wait */ if (test_bit(__I40E_PORT_SUSPENDED, vsi->back->state)) return i40e_vsi_stop_rings_no_wait(vsi);
/** * i40e_vsi_stop_rings_no_wait - Stop a VSI's rings and do not delay * @vsi: the VSI being shutdown * * This function stops all the rings for a VSI but does not delay to verify * that rings have been disabled. It is expected that the caller is shutting * down multiple VSIs at once and will delay together for all the VSIs after * initiating the shutdown. This is particularly useful for shutting down lots * of VFs together. Otherwise, a large delay can be incurred while configuring * each VSI in serial.
**/ void i40e_vsi_stop_rings_no_wait(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; int i, pf_q;
pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
i40e_control_tx_q(pf, pf_q, false);
i40e_control_rx_q(pf, pf_q, false);
}
}
/** * i40e_vsi_free_irq - Free the irq association with the OS * @vsi: the VSI being configured
**/ staticvoid i40e_vsi_free_irq(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int base = vsi->base_vector;
u32 val, qp; int i;
if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { if (!vsi->q_vectors) return;
if (!vsi->irqs_ready) return;
vsi->irqs_ready = false; for (i = 0; i < vsi->num_q_vectors; i++) { int irq_num;
u16 vector;
vector = i + base;
irq_num = pf->msix_entries[vector].vector;
/* free only the irqs that were actually requested */ if (!vsi->q_vectors[i] ||
!vsi->q_vectors[i]->num_ringpairs) continue;
/* clear the affinity notifier in the IRQ descriptor */
irq_set_affinity_notifier(irq_num, NULL); /* remove our suggested affinity mask for this IRQ */
irq_update_affinity_hint(irq_num, NULL);
free_irq(irq_num, vsi->q_vectors[i]);
/* Tear down the interrupt queue link list * * We know that they come in pairs and always * the Rx first, then the Tx. To clear the * link list, stick the EOL value into the * next_q field of the registers.
*/
val = rd32(hw, I40E_PFINT_LNKLSTN(vector - 1));
qp = FIELD_GET(I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK,
val);
val |= I40E_QUEUE_END_OF_LIST
<< I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), val);
while (qp != I40E_QUEUE_END_OF_LIST) {
u32 next;
val = rd32(hw, I40E_QINT_RQCTL(qp));
val &= ~(I40E_QINT_RQCTL_MSIX_INDX_MASK |
I40E_QINT_RQCTL_MSIX0_INDX_MASK |
I40E_QINT_RQCTL_CAUSE_ENA_MASK |
I40E_QINT_RQCTL_INTEVENT_MASK);
val |= (I40E_QINT_RQCTL_ITR_INDX_MASK |
I40E_QINT_RQCTL_NEXTQ_INDX_MASK);
wr32(hw, I40E_QINT_RQCTL(qp), val);
val = rd32(hw, I40E_QINT_TQCTL(qp));
next = FIELD_GET(I40E_QINT_TQCTL_NEXTQ_INDX_MASK,
val);
val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK |
I40E_QINT_TQCTL_MSIX0_INDX_MASK |
I40E_QINT_TQCTL_CAUSE_ENA_MASK |
I40E_QINT_TQCTL_INTEVENT_MASK);
val |= (I40E_QINT_TQCTL_ITR_INDX_MASK |
I40E_QINT_TQCTL_NEXTQ_INDX_MASK);
val = rd32(hw, I40E_PFINT_LNKLST0);
qp = FIELD_GET(I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK, val);
val |= I40E_QUEUE_END_OF_LIST
<< I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT;
wr32(hw, I40E_PFINT_LNKLST0, val);
val = rd32(hw, I40E_QINT_RQCTL(qp));
val &= ~(I40E_QINT_RQCTL_MSIX_INDX_MASK |
I40E_QINT_RQCTL_MSIX0_INDX_MASK |
I40E_QINT_RQCTL_CAUSE_ENA_MASK |
I40E_QINT_RQCTL_INTEVENT_MASK);
val |= (I40E_QINT_RQCTL_ITR_INDX_MASK |
I40E_QINT_RQCTL_NEXTQ_INDX_MASK);
wr32(hw, I40E_QINT_RQCTL(qp), val);
val = rd32(hw, I40E_QINT_TQCTL(qp));
val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK |
I40E_QINT_TQCTL_MSIX0_INDX_MASK |
I40E_QINT_TQCTL_CAUSE_ENA_MASK |
I40E_QINT_TQCTL_INTEVENT_MASK);
val |= (I40E_QINT_TQCTL_ITR_INDX_MASK |
I40E_QINT_TQCTL_NEXTQ_INDX_MASK);
wr32(hw, I40E_QINT_TQCTL(qp), val);
}
}
/** * i40e_free_q_vector - Free memory allocated for specific interrupt vector * @vsi: the VSI being configured * @v_idx: Index of vector to be freed * * This function frees the memory allocated to the q_vector. In addition if * NAPI is enabled it will delete any references to the NAPI struct prior * to freeing the q_vector.
**/ staticvoid i40e_free_q_vector(struct i40e_vsi *vsi, int v_idx)
{ struct i40e_q_vector *q_vector = vsi->q_vectors[v_idx]; struct i40e_ring *ring;
/* only VSI w/ an associated netdev is set up w/ NAPI */ if (vsi->netdev)
netif_napi_del(&q_vector->napi);
vsi->q_vectors[v_idx] = NULL;
kfree_rcu(q_vector, rcu);
}
/** * i40e_vsi_free_q_vectors - Free memory allocated for interrupt vectors * @vsi: the VSI being un-configured * * This frees the memory allocated to the q_vectors and * deletes references to the NAPI struct.
**/ staticvoid i40e_vsi_free_q_vectors(struct i40e_vsi *vsi)
{ int v_idx;
/** * i40e_reset_interrupt_capability - Disable interrupt setup in OS * @pf: board private structure
**/ staticvoid i40e_reset_interrupt_capability(struct i40e_pf *pf)
{ /* If we're in Legacy mode, the interrupt was cleaned in vsi_close */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
pci_disable_msix(pf->pdev);
kfree(pf->msix_entries);
pf->msix_entries = NULL;
kfree(pf->irq_pile);
pf->irq_pile = NULL;
} elseif (test_bit(I40E_FLAG_MSI_ENA, pf->flags)) {
pci_disable_msi(pf->pdev);
}
clear_bit(I40E_FLAG_MSI_ENA, pf->flags);
clear_bit(I40E_FLAG_MSIX_ENA, pf->flags);
}
/** * i40e_clear_interrupt_scheme - Clear the current interrupt scheme settings * @pf: board private structure * * We go through and clear interrupt specific resources and reset the structure * to pre-load conditions
**/ staticvoid i40e_clear_interrupt_scheme(struct i40e_pf *pf)
{ struct i40e_vsi *vsi; int i;
if (test_bit(__I40E_MISC_IRQ_REQUESTED, pf->state))
i40e_free_misc_vector(pf);
i40e_pf_for_each_vsi(pf, i, vsi)
i40e_vsi_free_q_vectors(vsi);
i40e_reset_interrupt_capability(pf);
}
/** * i40e_napi_enable_all - Enable NAPI for all q_vectors in the VSI * @vsi: the VSI being configured
**/ staticvoid i40e_napi_enable_all(struct i40e_vsi *vsi)
{ int q_idx;
if (q_vector->rx.ring || q_vector->tx.ring)
napi_enable(&q_vector->napi);
}
}
/** * i40e_napi_disable_all - Disable NAPI for all q_vectors in the VSI * @vsi: the VSI being configured
**/ staticvoid i40e_napi_disable_all(struct i40e_vsi *vsi)
{ int q_idx;
if (q_vector->rx.ring || q_vector->tx.ring)
napi_disable(&q_vector->napi);
}
}
/** * i40e_vsi_close - Shut down a VSI * @vsi: the vsi to be quelled
**/ staticvoid i40e_vsi_close(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; if (!test_and_set_bit(__I40E_VSI_DOWN, vsi->state))
i40e_down(vsi);
i40e_vsi_free_irq(vsi);
i40e_vsi_free_tx_resources(vsi);
i40e_vsi_free_rx_resources(vsi);
vsi->current_netdev_flags = 0;
set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state); if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
set_bit(__I40E_CLIENT_RESET, pf->state);
}
/** * i40e_quiesce_vsi - Pause a given VSI * @vsi: the VSI being paused
**/ staticvoid i40e_quiesce_vsi(struct i40e_vsi *vsi)
{ if (test_bit(__I40E_VSI_DOWN, vsi->state)) return;
set_bit(__I40E_VSI_NEEDS_RESTART, vsi->state); if (vsi->netdev && netif_running(vsi->netdev))
vsi->netdev->netdev_ops->ndo_stop(vsi->netdev); else
i40e_vsi_close(vsi);
}
/** * i40e_unquiesce_vsi - Resume a given VSI * @vsi: the VSI being resumed
**/ staticvoid i40e_unquiesce_vsi(struct i40e_vsi *vsi)
{ if (!test_and_clear_bit(__I40E_VSI_NEEDS_RESTART, vsi->state)) return;
if (vsi->netdev && netif_running(vsi->netdev))
vsi->netdev->netdev_ops->ndo_open(vsi->netdev); else
i40e_vsi_open(vsi); /* this clears the DOWN bit */
}
/** * i40e_pf_quiesce_all_vsi - Pause all VSIs on a PF * @pf: the PF
**/ staticvoid i40e_pf_quiesce_all_vsi(struct i40e_pf *pf)
{ struct i40e_vsi *vsi; int v;
/** * i40e_pf_unquiesce_all_vsi - Resume all VSIs on a PF * @pf: the PF
**/ staticvoid i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf)
{ struct i40e_vsi *vsi; int v;
/** * i40e_vsi_wait_queues_disabled - Wait for VSI's queues to be disabled * @vsi: the VSI being configured * * Wait until all queues on a given VSI have been disabled.
**/ int i40e_vsi_wait_queues_disabled(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; int i, pf_q, ret;
pf_q = vsi->base_queue; for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) { /* Check and wait for the Tx queue */
ret = i40e_pf_txq_wait(pf, pf_q, false); if (ret) {
dev_info(&pf->pdev->dev, "VSI seid %d Tx ring %d disable timeout\n",
vsi->seid, pf_q); return ret;
}
if (!i40e_enabled_xdp_vsi(vsi)) goto wait_rx;
/* Check and wait for the XDP Tx queue */
ret = i40e_pf_txq_wait(pf, pf_q + vsi->alloc_queue_pairs, false); if (ret) {
dev_info(&pf->pdev->dev, "VSI seid %d XDP Tx ring %d disable timeout\n",
vsi->seid, pf_q); return ret;
}
wait_rx: /* Check and wait for the Rx queue */
ret = i40e_pf_rxq_wait(pf, pf_q, false); if (ret) {
dev_info(&pf->pdev->dev, "VSI seid %d Rx ring %d disable timeout\n",
vsi->seid, pf_q); return ret;
}
}
return 0;
}
#ifdef CONFIG_I40E_DCB /** * i40e_pf_wait_queues_disabled - Wait for all queues of PF VSIs to be disabled * @pf: the PF * * This function waits for the queues to be in disabled state for all the * VSIs that are managed by this PF.
**/ staticint i40e_pf_wait_queues_disabled(struct i40e_pf *pf)
{ struct i40e_vsi *vsi; int v, ret = 0;
i40e_pf_for_each_vsi(pf, v, vsi) {
ret = i40e_vsi_wait_queues_disabled(vsi); if (ret) break;
}
return ret;
}
#endif
/** * i40e_get_iscsi_tc_map - Return TC map for iSCSI APP * @pf: pointer to PF * * Get TC map for ISCSI PF type that will include iSCSI TC * and LAN TC.
**/ static u8 i40e_get_iscsi_tc_map(struct i40e_pf *pf)
{ struct i40e_dcb_app_priority_table app; struct i40e_hw *hw = &pf->hw;
u8 enabled_tc = 1; /* TC0 is always enabled */
u8 tc, i; /* Get the iSCSI APP TLV */ struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
for (i = 0; i < dcbcfg->numapps; i++) {
app = dcbcfg->app[i]; if (app.selector == I40E_APP_SEL_TCPIP &&
app.protocolid == I40E_APP_PROTOID_ISCSI) {
tc = dcbcfg->etscfg.prioritytable[app.priority];
enabled_tc |= BIT(tc); break;
}
}
return enabled_tc;
}
/** * i40e_dcb_get_num_tc - Get the number of TCs from DCBx config * @dcbcfg: the corresponding DCBx configuration structure * * Return the number of TCs from given DCBx configuration
**/ static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg)
{ int i, tc_unused = 0;
u8 num_tc = 0;
u8 ret = 0;
/* Scan the ETS Config Priority Table to find * traffic class enabled for a given priority * and create a bitmask of enabled TCs
*/ for (i = 0; i < I40E_MAX_USER_PRIORITY; i++)
num_tc |= BIT(dcbcfg->etscfg.prioritytable[i]);
/* Now scan the bitmask to check for * contiguous TCs starting with TC0
*/ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (num_tc & BIT(i)) { if (!tc_unused) {
ret++;
} else {
pr_err("Non-contiguous TC - Disabling DCB\n"); return 1;
}
} else {
tc_unused = 1;
}
}
/* There is always at least TC0 */ if (!ret)
ret = 1;
return ret;
}
/** * i40e_dcb_get_enabled_tc - Get enabled traffic classes * @dcbcfg: the corresponding DCBx configuration structure * * Query the current DCB configuration and return the number of * traffic classes enabled from the given DCBX config
**/ static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg)
{
u8 num_tc = i40e_dcb_get_num_tc(dcbcfg);
u8 enabled_tc = 1;
u8 i;
for (i = 0; i < num_tc; i++)
enabled_tc |= BIT(i);
return enabled_tc;
}
/** * i40e_mqprio_get_enabled_tc - Get enabled traffic classes * @pf: PF being queried * * Query the current MQPRIO configuration and return the number of * traffic classes enabled.
**/ static u8 i40e_mqprio_get_enabled_tc(struct i40e_pf *pf)
{ struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf);
u8 num_tc = vsi->mqprio_qopt.qopt.num_tc;
u8 enabled_tc = 1, i;
for (i = 1; i < num_tc; i++)
enabled_tc |= BIT(i); return enabled_tc;
}
/** * i40e_pf_get_num_tc - Get enabled traffic classes for PF * @pf: PF being queried * * Return number of traffic classes enabled for the given PF
**/ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
{
u8 i, enabled_tc = 1;
u8 num_tc = 0;
if (i40e_is_tc_mqprio_enabled(pf)) { struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf);
return vsi->mqprio_qopt.qopt.num_tc;
}
/* If neither MQPRIO nor DCB is enabled, then always use single TC */ if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) return 1;
/* SFP mode will be enabled for all TCs on port */ if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) return i40e_dcb_get_num_tc(&pf->hw.local_dcbx_config);
/* MFP mode return count of enabled TCs for this PF */ if (pf->hw.func_caps.iscsi)
enabled_tc = i40e_get_iscsi_tc_map(pf); else return 1; /* Only TC0 */
for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (enabled_tc & BIT(i))
num_tc++;
} return num_tc;
}
/** * i40e_pf_get_tc_map - Get bitmap for enabled traffic classes * @pf: PF being queried * * Return a bitmap for enabled traffic classes for this PF.
**/ static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
{ if (i40e_is_tc_mqprio_enabled(pf)) return i40e_mqprio_get_enabled_tc(pf);
/* If neither MQPRIO nor DCB is enabled for this PF then just return * default TC
*/ if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) return I40E_DEFAULT_TRAFFIC_CLASS;
/* SFP mode we want PF to be enabled for all TCs */ if (!test_bit(I40E_FLAG_MFP_ENA, pf->flags)) return i40e_dcb_get_enabled_tc(&pf->hw.local_dcbx_config);
/* MFP enabled and iSCSI PF type */ if (pf->hw.func_caps.iscsi) return i40e_get_iscsi_tc_map(pf); else return I40E_DEFAULT_TRAFFIC_CLASS;
}
/** * i40e_vsi_get_bw_info - Query VSI BW Information * @vsi: the VSI being queried * * Returns 0 on success, negative value on failure
**/ staticint i40e_vsi_get_bw_info(struct i40e_vsi *vsi)
{ struct i40e_aqc_query_vsi_ets_sla_config_resp bw_ets_config = {0}; struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0}; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw;
u32 tc_bw_max; int ret; int i;
/* Get the VSI level BW configuration */
ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL); if (ret) {
dev_info(&pf->pdev->dev, "couldn't get PF vsi bw config, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status)); return -EINVAL;
}
/* Get the VSI level BW configuration per TC */
ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid, &bw_ets_config,
NULL); if (ret) {
dev_info(&pf->pdev->dev, "couldn't get PF vsi ets bw config, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status)); return -EINVAL;
}
if (bw_config.tc_valid_bits != bw_ets_config.tc_valid_bits) {
dev_info(&pf->pdev->dev, "Enabled TCs mismatch from querying VSI BW info 0x%08x 0x%08x\n",
bw_config.tc_valid_bits,
bw_ets_config.tc_valid_bits); /* Still continuing */
}
vsi->bw_limit = le16_to_cpu(bw_config.port_bw_limit);
vsi->bw_max_quanta = bw_config.max_bw;
tc_bw_max = le16_to_cpu(bw_ets_config.tc_bw_max[0]) |
(le16_to_cpu(bw_ets_config.tc_bw_max[1]) << 16); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
vsi->bw_ets_share_credits[i] = bw_ets_config.share_credits[i];
vsi->bw_ets_limit_credits[i] =
le16_to_cpu(bw_ets_config.credits[i]); /* 3 bits out of 4 for each TC */
vsi->bw_ets_max_quanta[i] = (u8)((tc_bw_max >> (i*4)) & 0x7);
}
return 0;
}
/** * i40e_vsi_configure_bw_alloc - Configure VSI BW allocation per TC * @vsi: the VSI being configured * @enabled_tc: TC bitmap * @bw_share: BW shared credits per TC * * Returns 0 on success, negative value on failure
**/ staticint i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi, u8 enabled_tc,
u8 *bw_share)
{ struct i40e_aqc_configure_vsi_tc_bw_data bw_data; struct i40e_pf *pf = vsi->back; int ret; int i;
/* There is no need to reset BW when mqprio mode is on. */ if (i40e_is_tc_mqprio_enabled(pf)) return 0; if (!vsi->mqprio_qopt.qopt.hw && !test_bit(I40E_FLAG_DCB_ENA, pf->flags)) {
ret = i40e_set_bw_limit(vsi, vsi->seid, 0); if (ret)
dev_info(&pf->pdev->dev, "Failed to reset tx rate for vsi->seid %u\n",
vsi->seid); return ret;
}
memset(&bw_data, 0, sizeof(bw_data));
bw_data.tc_valid_bits = enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
bw_data.tc_bw_credits[i] = bw_share[i];
ret = i40e_aq_config_vsi_tc_bw(&pf->hw, vsi->seid, &bw_data, NULL); if (ret) {
dev_info(&pf->pdev->dev, "AQ command Config VSI BW allocation per TC failed = %d\n",
pf->hw.aq.asq_last_status); return -EINVAL;
}
for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
vsi->info.qs_handle[i] = bw_data.qs_handles[i];
return 0;
}
/** * i40e_vsi_config_netdev_tc - Setup the netdev TC configuration * @vsi: the VSI being configured * @enabled_tc: TC map to be enabled *
**/ staticvoid i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc)
{ struct net_device *netdev = vsi->netdev; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw;
u8 netdev_tc = 0; int i; struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
if (!netdev) return;
if (!enabled_tc) {
netdev_reset_tc(netdev); return;
}
/* Set up actual enabled TCs on the VSI */ if (netdev_set_num_tc(netdev, vsi->tc_config.numtc)) return;
/* set per TC queues for the VSI */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { /* Only set TC queues for enabled tcs * * e.g. For a VSI that has TC0 and TC3 enabled the * enabled_tc bitmap would be 0x00001001; the driver * will set the numtc for netdev as 2 that will be * referenced by the netdev layer as TC 0 and 1.
*/ if (vsi->tc_config.enabled_tc & BIT(i))
netdev_set_tc_queue(netdev,
vsi->tc_config.tc_info[i].netdev_tc,
vsi->tc_config.tc_info[i].qcount,
vsi->tc_config.tc_info[i].qoffset);
}
if (i40e_is_tc_mqprio_enabled(pf)) return;
/* Assign UP2TC map for the VSI */ for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { /* Get the actual TC# for the UP */
u8 ets_tc = dcbcfg->etscfg.prioritytable[i]; /* Get the mapped netdev TC# for the UP */
netdev_tc = vsi->tc_config.tc_info[ets_tc].netdev_tc;
netdev_set_prio_tc_map(netdev, i, netdev_tc);
}
}
/** * i40e_vsi_update_queue_map - Update our copy of VSi info with new queue map * @vsi: the VSI being configured * @ctxt: the ctxt buffer returned from AQ VSI update param command
**/ staticvoid i40e_vsi_update_queue_map(struct i40e_vsi *vsi, struct i40e_vsi_context *ctxt)
{ /* copy just the sections touched not the entire info * since not all sections are valid as returned by * update vsi params
*/
vsi->info.mapping_flags = ctxt->info.mapping_flags;
memcpy(&vsi->info.queue_mapping,
&ctxt->info.queue_mapping, sizeof(vsi->info.queue_mapping));
memcpy(&vsi->info.tc_mapping, ctxt->info.tc_mapping, sizeof(vsi->info.tc_mapping));
}
/** * i40e_update_adq_vsi_queues - update queue mapping for ADq VSI * @vsi: the VSI being reconfigured * @vsi_offset: offset from main VF VSI
*/ int i40e_update_adq_vsi_queues(struct i40e_vsi *vsi, int vsi_offset)
{ struct i40e_vsi_context ctxt = {}; struct i40e_pf *pf; struct i40e_hw *hw; int ret;
if (!vsi) return -EINVAL;
pf = vsi->back;
hw = &pf->hw;
i40e_vsi_setup_queue_map(vsi, &ctxt, vsi->tc_config.enabled_tc, false); if (vsi->reconfig_rss) {
vsi->rss_size = min_t(int, pf->alloc_rss_size,
vsi->num_queue_pairs);
ret = i40e_vsi_config_rss(vsi); if (ret) {
dev_info(&pf->pdev->dev, "Failed to reconfig rss for num_queues\n"); return ret;
}
vsi->reconfig_rss = false;
}
ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) {
dev_info(&pf->pdev->dev, "Update vsi config failed, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(hw->aq.asq_last_status)); return ret;
} /* update the local VSI info with updated queue map */
i40e_vsi_update_queue_map(vsi, &ctxt);
vsi->info.valid_sections = 0;
return ret;
}
/** * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map * @vsi: VSI to be configured * @enabled_tc: TC bitmap * * This configures a particular VSI for TCs that are mapped to the * given TC bitmap. It uses default bandwidth share for TCs across * VSIs to configure TC for a particular VSI. * * NOTE: * It is expected that the VSI queues have been quisced before calling * this function.
**/ staticint i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
{
u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0}; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_vsi_context ctxt; int ret = 0; int i;
/* Check if enabled_tc is same as existing or new TCs */ if (vsi->tc_config.enabled_tc == enabled_tc &&
vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL) return ret;
/* Enable ETS TCs with equal BW Share for now across all VSIs */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (enabled_tc & BIT(i))
bw_share[i] = 1;
}
ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share); if (ret) { struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0};
dev_info(&pf->pdev->dev, "Failed configuring TC map %d for VSI %d\n",
enabled_tc, vsi->seid);
ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid,
&bw_config, NULL); if (ret) {
dev_info(&pf->pdev->dev, "Failed querying vsi bw info, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(hw->aq.asq_last_status)); goto out;
} if ((bw_config.tc_valid_bits & enabled_tc) != enabled_tc) {
u8 valid_tc = bw_config.tc_valid_bits & enabled_tc;
if (!valid_tc)
valid_tc = bw_config.tc_valid_bits; /* Always enable TC0, no matter what */
valid_tc |= 1;
dev_info(&pf->pdev->dev, "Requested tc 0x%x, but FW reports 0x%x as valid. Attempting to use 0x%x.\n",
enabled_tc, bw_config.tc_valid_bits, valid_tc);
enabled_tc = valid_tc;
}
ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share); if (ret) {
dev_err(&pf->pdev->dev, "Unable to configure TC map %d for VSI %d\n",
enabled_tc, vsi->seid); goto out;
}
}
/* Update Queue Pairs Mapping for currently enabled UPs */
ctxt.seid = vsi->seid;
ctxt.pf_num = vsi->back->hw.pf_id;
ctxt.vf_num = 0;
ctxt.uplink_seid = vsi->uplink_seid;
ctxt.info = vsi->info; if (i40e_is_tc_mqprio_enabled(pf)) {
ret = i40e_vsi_setup_queue_map_mqprio(vsi, &ctxt, enabled_tc); if (ret) goto out;
} else {
i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
}
/* On destroying the qdisc, reset vsi->rss_size, as number of enabled * queues changed.
*/ if (!vsi->mqprio_qopt.qopt.hw && vsi->reconfig_rss) {
vsi->rss_size = min_t(int, vsi->back->alloc_rss_size,
vsi->num_queue_pairs);
ret = i40e_vsi_config_rss(vsi); if (ret) {
dev_info(&vsi->back->pdev->dev, "Failed to reconfig rss for num_queues\n"); return ret;
}
vsi->reconfig_rss = false;
} if (test_bit(I40E_FLAG_IWARP_ENA, vsi->back->flags)) {
ctxt.info.valid_sections |=
cpu_to_le16(I40E_AQ_VSI_PROP_QUEUE_OPT_VALID);
ctxt.info.queueing_opt_flags |= I40E_AQ_VSI_QUE_OPT_TCP_ENA;
}
/* Update the VSI after updating the VSI queue-mapping * information
*/
ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) {
dev_info(&pf->pdev->dev, "Update vsi tc config failed, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(hw->aq.asq_last_status)); goto out;
} /* update the local VSI info with updated queue map */
i40e_vsi_update_queue_map(vsi, &ctxt);
vsi->info.valid_sections = 0;
/* Update current VSI BW information */
ret = i40e_vsi_get_bw_info(vsi); if (ret) {
dev_info(&pf->pdev->dev, "Failed updating vsi bw info, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(hw->aq.asq_last_status)); goto out;
}
/** * i40e_vsi_reconfig_tc - Reconfigure VSI Tx Scheduler for stored TC map * @vsi: VSI to be reconfigured * * This reconfigures a particular VSI for TCs that are mapped to the * TC bitmap stored previously for the VSI. * * Context: It is expected that the VSI queues have been quisced before * calling this function. * * Return: 0 on success, negative value on failure
**/ staticint i40e_vsi_reconfig_tc(struct i40e_vsi *vsi)
{
u8 enabled_tc;
/** * i40e_get_link_speed - Returns link speed for the interface * @vsi: VSI to be configured *
**/ staticint i40e_get_link_speed(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back;
switch (pf->hw.phy.link_info.link_speed) { case I40E_LINK_SPEED_40GB: return 40000; case I40E_LINK_SPEED_25GB: return 25000; case I40E_LINK_SPEED_20GB: return 20000; case I40E_LINK_SPEED_10GB: return 10000; case I40E_LINK_SPEED_1GB: return 1000; default: return -EINVAL;
}
}
/** * i40e_bw_bytes_to_mbits - Convert max_tx_rate from bytes to mbits * @vsi: Pointer to vsi structure * @max_tx_rate: max TX rate in bytes to be converted into Mbits * * Helper function to convert units before send to set BW limit
**/ static u64 i40e_bw_bytes_to_mbits(struct i40e_vsi *vsi, u64 max_tx_rate)
{ if (max_tx_rate < I40E_BW_MBPS_DIVISOR) {
dev_warn(&vsi->back->pdev->dev, "Setting max tx rate to minimum usable value of 50Mbps.\n");
max_tx_rate = I40E_BW_CREDIT_DIVISOR;
} else {
do_div(max_tx_rate, I40E_BW_MBPS_DIVISOR);
}
return max_tx_rate;
}
/** * i40e_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate * @vsi: VSI to be configured * @seid: seid of the channel/VSI * @max_tx_rate: max TX rate to be configured as BW limit * * Helper function to set BW limit for a given VSI
**/ int i40e_set_bw_limit(struct i40e_vsi *vsi, u16 seid, u64 max_tx_rate)
{ struct i40e_pf *pf = vsi->back;
u64 credits = 0; int speed = 0; int ret = 0;
speed = i40e_get_link_speed(vsi); if (max_tx_rate > speed) {
dev_err(&pf->pdev->dev, "Invalid max tx rate %llu specified for VSI seid %d.",
max_tx_rate, seid); return -EINVAL;
} if (max_tx_rate && max_tx_rate < I40E_BW_CREDIT_DIVISOR) {
dev_warn(&pf->pdev->dev, "Setting max tx rate to minimum usable value of 50Mbps.\n");
max_tx_rate = I40E_BW_CREDIT_DIVISOR;
}
/* Tx rate credits are in values of 50Mbps, 0 is disabled */
credits = max_tx_rate;
do_div(credits, I40E_BW_CREDIT_DIVISOR);
ret = i40e_aq_config_vsi_bw_limit(&pf->hw, seid, credits,
I40E_MAX_BW_INACTIVE_ACCUM, NULL); if (ret)
dev_err(&pf->pdev->dev, "Failed set tx rate (%llu Mbps) for vsi->seid %u, err %pe aq_err %s\n",
max_tx_rate, seid, ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status)); return ret;
}
/** * i40e_remove_queue_channels - Remove queue channels for the TCs * @vsi: VSI to be configured * * Remove queue channels for the TCs
**/ staticvoid i40e_remove_queue_channels(struct i40e_vsi *vsi)
{ struct i40e_cloud_filter *cfilter; enum libie_aq_err last_aq_status; struct i40e_channel *ch, *ch_tmp; struct i40e_pf *pf = vsi->back; struct hlist_node *node; int ret, i;
/* Reset rss size that was stored when reconfiguring rss for * channel VSIs with non-power-of-2 queue count.
*/
vsi->current_rss_size = 0;
/* perform cleanup for channels if they exist */ if (list_empty(&vsi->ch_list)) return;
/* Reset BW configured for this VSI via mqprio */
ret = i40e_set_bw_limit(vsi, ch->seid, 0); if (ret)
dev_info(&vsi->back->pdev->dev, "Failed to reset tx rate for ch->seid %u\n",
ch->seid);
/* delete cloud filters associated with this channel */
hlist_for_each_entry_safe(cfilter, node,
&pf->cloud_filter_list, cloud_node) { if (cfilter->seid != ch->seid) continue;
hash_del(&cfilter->cloud_node); if (cfilter->dst_port)
ret = i40e_add_del_cloud_filter_big_buf(vsi,
cfilter, false); else
ret = i40e_add_del_cloud_filter(vsi, cfilter, false);
last_aq_status = pf->hw.aq.asq_last_status; if (ret)
dev_info(&pf->pdev->dev, "Failed to delete cloud filter, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(last_aq_status));
kfree(cfilter);
}
/* delete VSI from FW */
ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
NULL); if (ret)
dev_err(&vsi->back->pdev->dev, "unable to remove channel (%d) for parent VSI(%d)\n",
ch->seid, p_vsi->seid);
kfree(ch);
}
INIT_LIST_HEAD(&vsi->ch_list);
}
/** * i40e_get_max_queues_for_channel * @vsi: ptr to VSI to which channels are associated with * * Helper function which returns max value among the queue counts set on the * channels/TCs created.
**/ staticint i40e_get_max_queues_for_channel(struct i40e_vsi *vsi)
{ struct i40e_channel *ch, *ch_tmp; int max = 0;
list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { if (!ch->initialized) continue; if (ch->num_queue_pairs > max)
max = ch->num_queue_pairs;
}
return max;
}
/** * i40e_validate_num_queues - validate num_queues w.r.t channel * @pf: ptr to PF device * @num_queues: number of queues * @vsi: the parent VSI * @reconfig_rss: indicates should the RSS be reconfigured or not * * This function validates number of queues in the context of new channel * which is being established and determines if RSS should be reconfigured * or not for parent VSI.
**/ staticint i40e_validate_num_queues(struct i40e_pf *pf, int num_queues, struct i40e_vsi *vsi, bool *reconfig_rss)
{ int max_ch_queues;
if (!reconfig_rss) return -EINVAL;
*reconfig_rss = false; if (vsi->current_rss_size) { if (num_queues > vsi->current_rss_size) {
dev_dbg(&pf->pdev->dev, "Error: num_queues (%d) > vsi's current_size(%d)\n",
num_queues, vsi->current_rss_size); return -EINVAL;
} elseif ((num_queues < vsi->current_rss_size) &&
(!is_power_of_2(num_queues))) {
dev_dbg(&pf->pdev->dev, "Error: num_queues (%d) < vsi's current_size(%d), but not power of 2\n",
num_queues, vsi->current_rss_size); return -EINVAL;
}
}
if (!is_power_of_2(num_queues)) { /* Find the max num_queues configured for channel if channel * exist. * if channel exist, then enforce 'num_queues' to be more than * max ever queues configured for channel.
*/
max_ch_queues = i40e_get_max_queues_for_channel(vsi); if (num_queues < max_ch_queues) {
dev_dbg(&pf->pdev->dev, "Error: num_queues (%d) < max queues configured for channel(%d)\n",
num_queues, max_ch_queues); return -EINVAL;
}
*reconfig_rss = true;
}
return 0;
}
/** * i40e_vsi_reconfig_rss - reconfig RSS based on specified rss_size * @vsi: the VSI being setup * @rss_size: size of RSS, accordingly LUT gets reprogrammed * * This function reconfigures RSS by reprogramming LUTs using 'rss_size'
**/ staticint i40e_vsi_reconfig_rss(struct i40e_vsi *vsi, u16 rss_size)
{ struct i40e_pf *pf = vsi->back;
u8 seed[I40E_HKEY_ARRAY_SIZE]; struct i40e_hw *hw = &pf->hw; int local_rss_size;
u8 *lut; int ret;
/* Ignoring user configured lut if there is one */
i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, local_rss_size);
/* Use user configured hash key if there is one, otherwise * use default.
*/ if (vsi->rss_hkey_user)
memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); else
netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size); if (ret) {
dev_info(&pf->pdev->dev, "Cannot set RSS lut, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(hw->aq.asq_last_status));
kfree(lut); return ret;
}
kfree(lut);
/* Do the update w.r.t. storing rss_size */ if (!vsi->orig_rss_size)
vsi->orig_rss_size = vsi->rss_size;
vsi->current_rss_size = local_rss_size;
return ret;
}
/** * i40e_channel_setup_queue_map - Setup a channel queue map * @pf: ptr to PF device * @ctxt: VSI context structure * @ch: ptr to channel structure * * Setup queue map for a specific channel
**/ staticvoid i40e_channel_setup_queue_map(struct i40e_pf *pf, struct i40e_vsi_context *ctxt, struct i40e_channel *ch)
{
u16 qcount, qmap, sections = 0;
u8 offset = 0; int pow;
/* Set queue map for a given VSI context */
i40e_channel_setup_queue_map(pf, &ctxt, ch);
/* Now time to create VSI */
ret = i40e_aq_add_vsi(hw, &ctxt, NULL); if (ret) {
dev_info(&pf->pdev->dev, "add new vsi failed, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status)); return -ENOENT;
}
/* Success, update channel, set enabled_tc only if the channel * is not a macvlan
*/
ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc;
ch->seid = ctxt.seid;
ch->vsi_number = ctxt.vsi_number;
ch->stat_counter_idx = le16_to_cpu(ctxt.info.stat_counter_idx);
/* copy just the sections touched not the entire info * since not all sections are valid as returned by * update vsi params
*/
ch->info.mapping_flags = ctxt.info.mapping_flags;
memcpy(&ch->info.queue_mapping,
&ctxt.info.queue_mapping, sizeof(ctxt.info.queue_mapping));
memcpy(&ch->info.tc_mapping, ctxt.info.tc_mapping, sizeof(ctxt.info.tc_mapping));
return 0;
}
staticint i40e_channel_config_bw(struct i40e_vsi *vsi, struct i40e_channel *ch,
u8 *bw_share)
{ struct i40e_aqc_configure_vsi_tc_bw_data bw_data; int ret; int i;
memset(&bw_data, 0, sizeof(bw_data));
bw_data.tc_valid_bits = ch->enabled_tc; for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
bw_data.tc_bw_credits[i] = bw_share[i];
ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, ch->seid,
&bw_data, NULL); if (ret) {
dev_info(&vsi->back->pdev->dev, "Config VSI BW allocation per TC failed, aq_err: %d for new_vsi->seid %u\n",
vsi->back->hw.aq.asq_last_status, ch->seid); return -EINVAL;
}
for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
ch->info.qs_handle[i] = bw_data.qs_handles[i];
return 0;
}
/** * i40e_channel_config_tx_ring - config TX ring associated with new channel * @pf: ptr to PF device * @vsi: the VSI being setup * @ch: ptr to channel structure * * Configure TX rings associated with channel (VSI) since queues are being * from parent VSI.
**/ staticint i40e_channel_config_tx_ring(struct i40e_pf *pf, struct i40e_vsi *vsi, struct i40e_channel *ch)
{
u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0}; int ret; int i;
/* Enable ETS TCs with equal BW Share for now across all VSIs */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (ch->enabled_tc & BIT(i))
bw_share[i] = 1;
}
/* configure BW for new VSI */
ret = i40e_channel_config_bw(vsi, ch, bw_share); if (ret) {
dev_info(&vsi->back->pdev->dev, "Failed configuring TC map %d for channel (seid %u)\n",
ch->enabled_tc, ch->seid); return ret;
}
for (i = 0; i < ch->num_queue_pairs; i++) { struct i40e_ring *tx_ring, *rx_ring;
u16 pf_q;
pf_q = ch->base_queue + i;
/* Get to TX ring ptr of main VSI, for re-setup TX queue * context
*/
tx_ring = vsi->tx_rings[pf_q];
tx_ring->ch = ch;
/* Get the RX ring ptr */
rx_ring = vsi->rx_rings[pf_q];
rx_ring->ch = ch;
}
return 0;
}
/** * i40e_setup_hw_channel - setup new channel * @pf: ptr to PF device * @vsi: the VSI being setup * @ch: ptr to channel structure * @uplink_seid: underlying HW switching element (VEB) ID * @type: type of channel to be created (VMDq2/VF) * * Setup new channel (VSI) based on specified type (VMDq2/VF) * and configures TX rings accordingly
**/ staticinlineint i40e_setup_hw_channel(struct i40e_pf *pf, struct i40e_vsi *vsi, struct i40e_channel *ch,
u16 uplink_seid, u8 type)
{ int ret;
/* Proceed with creation of channel (VMDq2) VSI */
ret = i40e_add_channel(pf, uplink_seid, ch); if (ret) {
dev_info(&pf->pdev->dev, "failed to add_channel using uplink_seid %u\n",
uplink_seid); return ret;
}
/* Mark the successful creation of channel */
ch->initialized = true;
/* Reconfigure TX queues using QTX_CTL register */
ret = i40e_channel_config_tx_ring(pf, vsi, ch); if (ret) {
dev_info(&pf->pdev->dev, "failed to configure TX rings for channel %u\n",
ch->seid); return ret;
}
/** * i40e_setup_channel - setup new channel using uplink element * @pf: ptr to PF device * @vsi: pointer to the VSI to set up the channel within * @ch: ptr to channel structure * * Setup new channel (VSI) based on specified type (VMDq2/VF) * and uplink switching element (uplink_seid)
**/ staticbool i40e_setup_channel(struct i40e_pf *pf, struct i40e_vsi *vsi, struct i40e_channel *ch)
{ struct i40e_vsi *main_vsi;
u8 vsi_type;
u16 seid; int ret;
if (vsi->type == I40E_VSI_MAIN) {
vsi_type = I40E_VSI_VMDQ2;
} else {
dev_err(&pf->pdev->dev, "unsupported parent vsi type(%d)\n",
vsi->type); returnfalse;
}
/* create channel (VSI), configure TX rings */
ret = i40e_setup_hw_channel(pf, vsi, ch, seid, vsi_type); if (ret) {
dev_err(&pf->pdev->dev, "failed to setup hw_channel\n"); returnfalse;
}
return ch->initialized ? true : false;
}
/** * i40e_validate_and_set_switch_mode - sets up switch mode correctly * @vsi: ptr to VSI which has PF backing * * Sets up switch mode correctly if it needs to be changed and perform * what are allowed modes.
**/ staticint i40e_validate_and_set_switch_mode(struct i40e_vsi *vsi)
{
u8 mode; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int ret;
ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_dev_capabilities); if (ret) return -EINVAL;
if (hw->dev_caps.switch_mode) { /* if switch mode is set, support mode2 (non-tunneled for * cloud filter) for now
*/
u32 switch_mode = hw->dev_caps.switch_mode &
I40E_SWITCH_MODE_MASK; if (switch_mode >= I40E_CLOUD_FILTER_MODE1) { if (switch_mode == I40E_CLOUD_FILTER_MODE2) return 0;
dev_err(&pf->pdev->dev, "Invalid switch_mode (%d), only non-tunneled mode for cloud filter is supported\n",
hw->dev_caps.switch_mode); return -EINVAL;
}
}
/* Set Bit 7 to be valid */
mode = I40E_AQ_SET_SWITCH_BIT7_VALID;
/* Set L4type for TCP support */
mode |= I40E_AQ_SET_SWITCH_L4_TYPE_TCP;
/* Set cloud filter mode */
mode |= I40E_AQ_SET_SWITCH_MODE_NON_TUNNEL;
/* Prep mode field for set_switch_config */
ret = i40e_aq_set_switch_config(hw, pf->last_sw_conf_flags,
pf->last_sw_conf_valid_flags,
mode, NULL); if (ret && hw->aq.asq_last_status != LIBIE_AQ_RC_ESRCH)
dev_err(&pf->pdev->dev, "couldn't set switch config bits, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(hw->aq.asq_last_status));
return ret;
}
/** * i40e_create_queue_channel - function to create channel * @vsi: VSI to be configured * @ch: ptr to channel (it contains channel specific params) * * This function creates channel (VSI) using num_queues specified by user, * reconfigs RSS if needed.
**/ int i40e_create_queue_channel(struct i40e_vsi *vsi, struct i40e_channel *ch)
{ struct i40e_pf *pf = vsi->back; bool reconfig_rss; int err;
/* validate user requested num_queues for channel */
err = i40e_validate_num_queues(pf, ch->num_queue_pairs, vsi,
&reconfig_rss); if (err) {
dev_info(&pf->pdev->dev, "Failed to validate num_queues (%d)\n",
ch->num_queue_pairs); return -EINVAL;
}
/* By default we are in VEPA mode, if this is the first VF/VMDq * VSI to be added switch to VEB mode.
*/
if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) {
set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
if (vsi->type == I40E_VSI_MAIN) { if (i40e_is_tc_mqprio_enabled(pf))
i40e_do_reset(pf, I40E_PF_RESET_FLAG, true); else
i40e_do_reset_safe(pf, I40E_PF_RESET_FLAG);
} /* now onwards for main VSI, number of queues will be value * of TC0's queue count
*/
}
/* By this time, vsi->cnt_q_avail shall be set to non-zero and * it should be more than num_queues
*/ if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_queue_pairs) {
dev_dbg(&pf->pdev->dev, "Error: cnt_q_avail (%u) less than num_queues %d\n",
vsi->cnt_q_avail, ch->num_queue_pairs); return -EINVAL;
}
/* reconfig_rss only if vsi type is MAIN_VSI */ if (reconfig_rss && (vsi->type == I40E_VSI_MAIN)) {
err = i40e_vsi_reconfig_rss(vsi, ch->num_queue_pairs); if (err) {
dev_info(&pf->pdev->dev, "Error: unable to reconfig rss for num_queues (%u)\n",
ch->num_queue_pairs); return -EINVAL;
}
}
if (!i40e_setup_channel(pf, vsi, ch)) {
dev_info(&pf->pdev->dev, "Failed to setup channel\n"); return -EINVAL;
}
/* configure VSI for BW limit */ if (ch->max_tx_rate) {
u64 credits = ch->max_tx_rate;
if (i40e_set_bw_limit(vsi, ch->seid, ch->max_tx_rate)) return -EINVAL;
do_div(credits, I40E_BW_CREDIT_DIVISOR);
dev_dbg(&pf->pdev->dev, "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
ch->max_tx_rate,
credits,
ch->seid);
}
/* in case of VF, this will be main SRIOV VSI */
ch->parent_vsi = vsi;
/* and update main_vsi's count for queue_available to use */
vsi->cnt_q_avail -= ch->num_queue_pairs;
return 0;
}
/** * i40e_configure_queue_channels - Add queue channel for the given TCs * @vsi: VSI to be configured * * Configures queue channel mapping to the given TCs
**/ staticint i40e_configure_queue_channels(struct i40e_vsi *vsi)
{ struct i40e_channel *ch;
u64 max_rate = 0; int ret = 0, i;
/* Create app vsi with the TCs. Main VSI with TC0 is already set up */
vsi->tc_seid_map[0] = vsi->seid; for (i = 1; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (vsi->tc_config.enabled_tc & BIT(i)) {
ch = kzalloc(sizeof(*ch), GFP_KERNEL); if (!ch) {
ret = -ENOMEM; goto err_free;
}
/** * i40e_veb_config_tc - Configure TCs for given VEB * @veb: given VEB * @enabled_tc: TC bitmap * * Configures given TC bitmap for VEB (switching) element
**/ int i40e_veb_config_tc(struct i40e_veb *veb, u8 enabled_tc)
{ struct i40e_aqc_configure_switching_comp_bw_config_data bw_data = {0}; struct i40e_pf *pf = veb->pf; int ret = 0; int i;
/* No TCs or already enabled TCs just return */ if (!enabled_tc || veb->enabled_tc == enabled_tc) return ret;
bw_data.tc_valid_bits = enabled_tc; /* bw_data.absolute_credits is not set (relative) */
/* Enable ETS TCs with equal BW Share for now */ for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (enabled_tc & BIT(i))
bw_data.tc_bw_share_credits[i] = 1;
}
/* Update the BW information */
ret = i40e_veb_get_bw_info(veb); if (ret) {
dev_info(&pf->pdev->dev, "Failed getting veb bw config, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status));
}
out: return ret;
}
#ifdef CONFIG_I40E_DCB /** * i40e_dcb_reconfigure - Reconfigure all VEBs and VSIs * @pf: PF struct * * Reconfigure VEB/VSIs on a given PF; it is assumed that * the caller would've quiesce all the VSIs before calling * this function
**/ staticvoid i40e_dcb_reconfigure(struct i40e_pf *pf)
{ struct i40e_vsi *vsi; struct i40e_veb *veb;
u8 tc_map = 0; int ret; int v;
/* Enable the TCs available on PF to all VEBs */
tc_map = i40e_pf_get_tc_map(pf); if (tc_map == I40E_DEFAULT_TRAFFIC_CLASS) return;
i40e_pf_for_each_veb(pf, v, veb) {
ret = i40e_veb_config_tc(veb, tc_map); if (ret) {
dev_info(&pf->pdev->dev, "Failed configuring TC for VEB seid=%d\n",
veb->seid); /* Will try to configure as many components */
}
}
/* Update each VSI */
i40e_pf_for_each_vsi(pf, v, vsi) { /* - Enable all TCs for the LAN VSI * - For all others keep them at TC0 for now
*/ if (vsi->type == I40E_VSI_MAIN)
tc_map = i40e_pf_get_tc_map(pf); else
tc_map = I40E_DEFAULT_TRAFFIC_CLASS;
ret = i40e_vsi_config_tc(vsi, tc_map); if (ret) {
dev_info(&pf->pdev->dev, "Failed configuring TC for VSI seid=%d\n",
vsi->seid); /* Will try to configure as many components */
} else { /* Re-configure VSI vectors based on updated TC map */
i40e_vsi_map_rings_to_vectors(vsi); if (vsi->netdev)
i40e_dcbnl_set_all(vsi);
}
}
}
/** * i40e_resume_port_tx - Resume port Tx * @pf: PF struct * * Resume a port's Tx and issue a PF reset in case of failure to * resume.
**/ staticint i40e_resume_port_tx(struct i40e_pf *pf)
{ struct i40e_hw *hw = &pf->hw; int ret;
ret = i40e_aq_resume_port_tx(hw, NULL); if (ret) {
dev_info(&pf->pdev->dev, "Resume Port Tx failed, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status)); /* Schedule PF reset to recover */
set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
i40e_service_event_schedule(pf);
}
return ret;
}
/** * i40e_suspend_port_tx - Suspend port Tx * @pf: PF struct * * Suspend a port's Tx and issue a PF reset in case of failure.
**/ staticint i40e_suspend_port_tx(struct i40e_pf *pf)
{ struct i40e_hw *hw = &pf->hw; int ret;
ret = i40e_aq_suspend_port_tx(hw, pf->mac_seid, NULL); if (ret) {
dev_info(&pf->pdev->dev, "Suspend Port Tx failed, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); /* Schedule PF reset to recover */
set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
i40e_service_event_schedule(pf);
}
return ret;
}
/** * i40e_hw_set_dcb_config - Program new DCBX settings into HW * @pf: PF being configured * @new_cfg: New DCBX configuration * * Program DCB settings into HW and reconfigure VEB/VSIs on * given PF. Uses "Set LLDP MIB" AQC to program the hardware.
**/ staticint i40e_hw_set_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg)
{ struct i40e_dcbx_config *old_cfg = &pf->hw.local_dcbx_config; int ret;
/* Check if need reconfiguration */ if (!memcmp(&new_cfg, &old_cfg, sizeof(new_cfg))) {
dev_dbg(&pf->pdev->dev, "No Change in DCB Config required.\n"); return 0;
}
/* Config change disable all VSIs */
i40e_pf_quiesce_all_vsi(pf);
/* Copy the new config to the current config */
*old_cfg = *new_cfg;
old_cfg->etsrec = old_cfg->etscfg;
ret = i40e_set_dcb_config(&pf->hw); if (ret) {
dev_info(&pf->pdev->dev, "Set DCB Config failed, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); goto out;
}
/* Changes in configuration update VEB/VSI */
i40e_dcb_reconfigure(pf);
out: /* In case of reset do not try to resume anything */ if (!test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) { /* Re-start the VSIs if disabled */
ret = i40e_resume_port_tx(pf); /* In case of error no point in resuming VSIs */ if (ret) goto err;
i40e_pf_unquiesce_all_vsi(pf);
}
err: return ret;
}
/** * i40e_hw_dcb_config - Program new DCBX settings into HW * @pf: PF being configured * @new_cfg: New DCBX configuration * * Program DCB settings into HW and reconfigure VEB/VSIs on * given PF
**/ int i40e_hw_dcb_config(struct i40e_pf *pf, struct i40e_dcbx_config *new_cfg)
{ struct i40e_aqc_configure_switching_comp_ets_data ets_data;
u8 prio_type[I40E_MAX_TRAFFIC_CLASS] = {0};
u32 mfs_tc[I40E_MAX_TRAFFIC_CLASS]; struct i40e_dcbx_config *old_cfg;
u8 mode[I40E_MAX_TRAFFIC_CLASS]; struct i40e_rx_pb_config pb_cfg; struct i40e_hw *hw = &pf->hw;
u8 num_ports = hw->num_ports; bool need_reconfig; int ret = -EINVAL;
u8 lltc_map = 0;
u8 tc_map = 0;
u8 new_numtc;
u8 i;
dev_dbg(&pf->pdev->dev, "Configuring DCB registers directly\n"); /* Un-pack information to Program ETS HW via shared API * numtc, tcmap * LLTC map * ETS/NON-ETS arbiter mode * max exponent (credit refills) * Total number of ports * PFC priority bit-map * Priority Table * BW % per TC * Arbiter mode between UPs sharing same TC * TSA table (ETS or non-ETS) * EEE enabled or not * MFS TC table
*/
new_numtc = i40e_dcb_get_num_tc(new_cfg);
memset(&ets_data, 0, sizeof(ets_data)); for (i = 0; i < new_numtc; i++) {
tc_map |= BIT(i); switch (new_cfg->etscfg.tsatable[i]) { case I40E_IEEE_TSA_ETS:
prio_type[i] = I40E_DCB_PRIO_TYPE_ETS;
ets_data.tc_bw_share_credits[i] =
new_cfg->etscfg.tcbwtable[i]; break; case I40E_IEEE_TSA_STRICT:
prio_type[i] = I40E_DCB_PRIO_TYPE_STRICT;
lltc_map |= BIT(i);
ets_data.tc_bw_share_credits[i] =
I40E_DCB_STRICT_PRIO_CREDITS; break; default: /* Invalid TSA type */
need_reconfig = false; goto out;
}
}
old_cfg = &hw->local_dcbx_config; /* Check if need reconfiguration */
need_reconfig = i40e_dcb_need_reconfig(pf, old_cfg, new_cfg);
/* If needed, enable/disable frame tagging, disable all VSIs * and suspend port tx
*/ if (need_reconfig) { /* Enable DCB tagging only when more than one TC */ if (new_numtc > 1)
set_bit(I40E_FLAG_DCB_ENA, pf->flags); else
clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
set_bit(__I40E_PORT_SUSPENDED, pf->state); /* Reconfiguration needed quiesce all VSIs */
i40e_pf_quiesce_all_vsi(pf);
ret = i40e_suspend_port_tx(pf); if (ret) goto err;
}
/* Configure Port ETS Tx Scheduler */
ets_data.tc_valid_bits = tc_map;
ets_data.tc_strict_priority_flags = lltc_map;
ret = i40e_aq_config_switch_comp_ets
(hw, pf->mac_seid, &ets_data,
i40e_aqc_opc_modify_switching_comp_ets, NULL); if (ret) {
dev_info(&pf->pdev->dev, "Modify Port ETS failed, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); goto out;
}
/* Update the local Rx Packet buffer config */
pf->pb_cfg = pb_cfg;
/* Inform the FW about changes to DCB configuration */
ret = i40e_aq_dcb_updated(&pf->hw, NULL); if (ret) {
dev_info(&pf->pdev->dev, "DCB Updated failed, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); goto out;
}
/* Update the port DCBx configuration */
*old_cfg = *new_cfg;
/* Changes in configuration update VEB/VSI */
i40e_dcb_reconfigure(pf);
out: /* Re-start the VSIs if disabled */ if (need_reconfig) {
ret = i40e_resume_port_tx(pf);
clear_bit(__I40E_PORT_SUSPENDED, pf->state); /* In case of error no point in resuming VSIs */ if (ret) goto err;
/* Wait for the PF's queues to be disabled */
ret = i40e_pf_wait_queues_disabled(pf); if (ret) { /* Schedule PF reset to recover */
set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
i40e_service_event_schedule(pf); goto err;
} else {
i40e_pf_unquiesce_all_vsi(pf);
set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
} /* registers are set, lets apply */ if (test_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, pf->hw.caps))
ret = i40e_hw_set_dcb_config(pf, new_cfg);
}
err: return ret;
}
/** * i40e_dcb_sw_default_config - Set default DCB configuration when DCB in SW * @pf: PF being queried * * Set default DCB configuration in case DCB is to be done in SW.
**/ int i40e_dcb_sw_default_config(struct i40e_pf *pf)
{ struct i40e_dcbx_config *dcb_cfg = &pf->hw.local_dcbx_config; struct i40e_aqc_configure_switching_comp_ets_data ets_data; struct i40e_hw *hw = &pf->hw; int err;
if (test_bit(I40E_HW_CAP_USE_SET_LLDP_MIB, pf->hw.caps)) { /* Update the local cached instance with TC0 ETS */
memset(&pf->tmp_cfg, 0, sizeof(struct i40e_dcbx_config));
pf->tmp_cfg.etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING;
pf->tmp_cfg.etscfg.maxtcs = 0;
pf->tmp_cfg.etscfg.tcbwtable[0] = I40E_IEEE_DEFAULT_ETS_TCBW;
pf->tmp_cfg.etscfg.tsatable[0] = I40E_IEEE_TSA_ETS;
pf->tmp_cfg.pfc.willing = I40E_IEEE_DEFAULT_PFC_WILLING;
pf->tmp_cfg.pfc.pfccap = I40E_MAX_TRAFFIC_CLASS; /* FW needs one App to configure HW */
pf->tmp_cfg.numapps = I40E_IEEE_DEFAULT_NUM_APPS;
pf->tmp_cfg.app[0].selector = I40E_APP_SEL_ETHTYPE;
pf->tmp_cfg.app[0].priority = I40E_IEEE_DEFAULT_APP_PRIO;
pf->tmp_cfg.app[0].protocolid = I40E_APP_PROTOID_FCOE;
/* Enable ETS on the Physical port */
err = i40e_aq_config_switch_comp_ets
(hw, pf->mac_seid, &ets_data,
i40e_aqc_opc_enable_switching_comp_ets, NULL); if (err) {
dev_info(&pf->pdev->dev, "Enable Port ETS failed, err %pe aq_err %s\n",
ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status));
err = -ENOENT; goto out;
}
/* Update the local cached instance with TC0 ETS */
dcb_cfg->etscfg.willing = I40E_IEEE_DEFAULT_ETS_WILLING;
dcb_cfg->etscfg.cbs = 0;
dcb_cfg->etscfg.maxtcs = I40E_MAX_TRAFFIC_CLASS;
dcb_cfg->etscfg.tcbwtable[0] = I40E_IEEE_DEFAULT_ETS_TCBW;
out: return err;
}
/** * i40e_init_pf_dcb - Initialize DCB configuration * @pf: PF being configured * * Query the current DCB configuration and cache it * in the hardware structure
**/ staticint i40e_init_pf_dcb(struct i40e_pf *pf)
{ struct i40e_hw *hw = &pf->hw; int err;
/* Do not enable DCB for SW1 and SW2 images even if the FW is capable * Also do not enable DCBx if FW LLDP agent is disabled
*/ if (test_bit(I40E_HW_CAP_NO_DCB_SUPPORT, pf->hw.caps)) {
dev_info(&pf->pdev->dev, "DCB is not supported.\n");
err = -EOPNOTSUPP; goto out;
} if (test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags)) {
dev_info(&pf->pdev->dev, "FW LLDP is disabled, attempting SW DCB\n");
err = i40e_dcb_sw_default_config(pf); if (err) {
dev_info(&pf->pdev->dev, "Could not initialize SW DCB\n"); goto out;
}
dev_info(&pf->pdev->dev, "SW DCB initialization succeeded.\n");
pf->dcbx_cap = DCB_CAP_DCBX_HOST |
DCB_CAP_DCBX_VER_IEEE; /* at init capable but disabled */
set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
clear_bit(I40E_FLAG_DCB_ENA, pf->flags); goto out;
}
err = i40e_init_dcb(hw, true); if (!err) { /* Device/Function is not DCBX capable */ if ((!hw->func_caps.dcb) ||
(hw->dcbx_status == I40E_DCBX_STATUS_DISABLED)) {
dev_info(&pf->pdev->dev, "DCBX offload is not supported or is disabled for this PF.\n");
} else { /* When status is not DISABLED then DCBX in FW */
pf->dcbx_cap = DCB_CAP_DCBX_LLD_MANAGED |
DCB_CAP_DCBX_VER_IEEE;
set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); /* Enable DCB tagging only when more than one TC * or explicitly disable if only one TC
*/ if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
set_bit(I40E_FLAG_DCB_ENA, pf->flags); else
clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
dev_dbg(&pf->pdev->dev, "DCBX offload is supported for this PF.\n");
}
} elseif (pf->hw.aq.asq_last_status == LIBIE_AQ_RC_EPERM) {
dev_info(&pf->pdev->dev, "FW LLDP disabled for this PF.\n");
set_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags);
} else {
dev_info(&pf->pdev->dev, "Query for DCB configuration failed, err %pe aq_err %s\n",
ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status));
}
memzero_explicit(&kedata, sizeof(kedata)); if (vsi->netdev->ethtool_ops->get_eee)
vsi->netdev->ethtool_ops->get_eee(vsi->netdev, &kedata);
if (!linkmode_empty(kedata.supported))
netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex, Flow Control: %s, EEE: %s\n",
speed, fc,
kedata.eee_enabled ? "Enabled" : "Disabled"); else
netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex, Flow Control: %s\n",
speed, fc);
}
/** * i40e_print_link_message - print link up or down * @vsi: the VSI for which link needs a message * @isup: true of link is up, false otherwise
*/ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
{ enum i40e_aq_link_speed new_speed; struct i40e_pf *pf = vsi->back; char *speed = "Unknown"; char *fc = "Unknown"; char *fec = ""; char *req_fec = ""; char *an = "";
if (isup)
new_speed = pf->hw.phy.link_info.link_speed; else
new_speed = I40E_LINK_SPEED_UNKNOWN;
if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed)) return;
vsi->current_isup = isup;
vsi->current_speed = new_speed; if (!isup) {
netdev_info(vsi->netdev, "NIC Link is Down\n"); return;
}
/* Warn user if link speed on NPAR enabled partition is not at * least 10GB
*/ if (pf->hw.func_caps.npar_enable &&
(pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_1GB ||
pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_100MB))
netdev_warn(vsi->netdev, "The partition detected link speed that is less than 10Gbps\n");
switch (pf->hw.phy.link_info.link_speed) { case I40E_LINK_SPEED_40GB:
speed = "40 G"; break; case I40E_LINK_SPEED_20GB:
speed = "20 G"; break; case I40E_LINK_SPEED_25GB:
speed = "25 G"; break; case I40E_LINK_SPEED_10GB:
speed = "10 G"; break; case I40E_LINK_SPEED_5GB:
speed = "5 G"; break; case I40E_LINK_SPEED_2_5GB:
speed = "2.5 G"; break; case I40E_LINK_SPEED_1GB:
speed = "1000 M"; break; case I40E_LINK_SPEED_100MB:
speed = "100 M"; break; default: break;
}
switch (pf->hw.fc.current_mode) { case I40E_FC_FULL:
fc = "RX/TX"; break; case I40E_FC_TX_PAUSE:
fc = "TX"; break; case I40E_FC_RX_PAUSE:
fc = "RX"; break; default:
fc = "None"; break;
}
if (pf->hw.phy.link_info.link_speed == I40E_LINK_SPEED_25GB) {
req_fec = "None";
fec = "None";
an = "False";
if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
an = "True";
/* 'CL108 RS-FEC' should be displayed when RS is requested, or * both RS and FC are requested
*/ if (vsi->back->hw.phy.link_info.req_fec_info &
(I40E_AQ_REQUEST_FEC_KR | I40E_AQ_REQUEST_FEC_RS)) { if (vsi->back->hw.phy.link_info.req_fec_info &
I40E_AQ_REQUEST_FEC_RS)
req_fec = "CL108 RS-FEC"; else
req_fec = "CL74 FC-FEC/BASE-R";
}
netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
speed, req_fec, fec, an, fc);
} elseif (pf->hw.device_id == I40E_DEV_ID_KX_X722) {
req_fec = "None";
fec = "None";
an = "False";
if (pf->hw.phy.link_info.an_info & I40E_AQ_AN_COMPLETED)
an = "True";
if (pf->hw.phy.link_info.fec_info &
I40E_AQ_CONFIG_FEC_KR_ENA)
fec = "CL74 FC-FEC/BASE-R";
if (pf->hw.phy.link_info.req_fec_info &
I40E_AQ_REQUEST_FEC_KR)
req_fec = "CL74 FC-FEC/BASE-R";
netdev_info(vsi->netdev, "NIC Link is Up, %sbps Full Duplex, Requested FEC: %s, Negotiated FEC: %s, Autoneg: %s, Flow Control: %s\n",
speed, req_fec, fec, an, fc);
} else {
i40e_print_link_message_eee(vsi, speed, fc);
}
}
/** * i40e_up_complete - Finish the last steps of bringing up a connection * @vsi: the VSI being configured
**/ staticint i40e_up_complete(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; int err;
if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
i40e_vsi_configure_msix(vsi); else
i40e_configure_msi_and_legacy(vsi);
/* On the next run of the service_task, notify any clients of the new * opened netdev
*/
set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
i40e_service_event_schedule(pf);
return 0;
}
/** * i40e_vsi_reinit_locked - Reset the VSI * @vsi: the VSI being configured * * Rebuild the ring structs after some configuration * has changed, e.g. MTU size.
**/ staticvoid i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back;
while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state))
usleep_range(1000, 2000);
i40e_down(vsi);
/** * i40e_force_link_state - Force the link status * @pf: board private structure * @is_up: whether the link state should be forced up or down
**/ staticint i40e_force_link_state(struct i40e_pf *pf, bool is_up)
{ struct i40e_aq_get_phy_abilities_resp abilities; struct i40e_aq_set_phy_config config = {0}; bool non_zero_phy_type = is_up; struct i40e_hw *hw = &pf->hw;
u64 mask;
u8 speed; int err;
/* Card might've been put in an unstable state by other drivers * and applications, which causes incorrect speed values being * set on startup. In order to clear speed registers, we call * get_phy_capabilities twice, once to get initial state of * available speeds, and once to get current PHY config.
*/
err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities,
NULL); if (err) {
dev_err(&pf->pdev->dev, "failed to get phy cap., ret = %pe last_status = %s\n",
ERR_PTR(err), libie_aq_str(hw->aq.asq_last_status)); return err;
}
speed = abilities.link_speed;
/* Get the current phy config */
err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities,
NULL); if (err) {
dev_err(&pf->pdev->dev, "failed to get phy cap., ret = %pe last_status = %s\n",
ERR_PTR(err), libie_aq_str(hw->aq.asq_last_status)); return err;
}
/* If link needs to go up, but was not forced to go down, * and its speed values are OK, no need for a flap * if non_zero_phy_type was set, still need to force up
*/ if (test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags))
non_zero_phy_type = true; elseif (is_up && abilities.phy_type != 0 && abilities.link_speed != 0) return 0;
/* To force link we need to set bits for all supported PHY types, * but there are now more than 32, so we need to split the bitmap * across two fields.
*/
mask = I40E_PHY_TYPES_BITMASK;
config.phy_type =
non_zero_phy_type ? cpu_to_le32((u32)(mask & 0xffffffff)) : 0;
config.phy_type_ext =
non_zero_phy_type ? (u8)((mask >> 32) & 0xff) : 0; /* Copy the old settings, except of phy_type */
config.abilities = abilities.abilities; if (test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags)) { if (is_up)
config.abilities |= I40E_AQ_PHY_ENABLE_LINK; else
config.abilities &= ~(I40E_AQ_PHY_ENABLE_LINK);
} if (abilities.link_speed != 0)
config.link_speed = abilities.link_speed; else
config.link_speed = speed;
config.eee_capability = abilities.eee_capability;
config.eeer = abilities.eeer_val;
config.low_power_ctrl = abilities.d3_lpan;
config.fec_config = abilities.fec_cfg_curr_mod_ext_info &
I40E_AQ_PHY_FEC_CONFIG_MASK;
err = i40e_aq_set_phy_config(hw, &config, NULL);
if (err) {
dev_err(&pf->pdev->dev, "set phy config ret = %pe last_status = %s\n",
ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); return err;
}
/* Update the link info */
err = i40e_update_link_info(hw); if (err) { /* Wait a little bit (on 40G cards it sometimes takes a really * long time for link to come back from the atomic reset) * and try once more
*/
msleep(1000);
i40e_update_link_info(hw);
}
i40e_aq_set_link_restart_an(hw, is_up, NULL);
return 0;
}
/** * i40e_up - Bring the connection back up after being down * @vsi: the VSI being configured
**/ int i40e_up(struct i40e_vsi *vsi)
{ int err;
err = i40e_vsi_configure(vsi); if (!err)
err = i40e_up_complete(vsi);
return err;
}
/** * i40e_down - Shutdown the connection processing * @vsi: the VSI being stopped
**/ void i40e_down(struct i40e_vsi *vsi)
{ int i;
/* It is assumed that the caller of this function * sets the vsi->state __I40E_VSI_DOWN bit.
*/ if (vsi->netdev) {
netif_carrier_off(vsi->netdev);
netif_tx_disable(vsi->netdev);
}
i40e_vsi_disable_irq(vsi);
i40e_vsi_stop_rings(vsi); if (vsi->type == I40E_VSI_MAIN &&
(test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, vsi->back->flags) ||
test_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, vsi->back->flags)))
i40e_force_link_state(vsi->back, false);
i40e_napi_disable_all(vsi);
for (i = 0; i < vsi->num_queue_pairs; i++) {
i40e_clean_tx_ring(vsi->tx_rings[i]); if (i40e_enabled_xdp_vsi(vsi)) { /* Make sure that in-progress ndo_xdp_xmit and * ndo_xsk_wakeup calls are completed.
*/
synchronize_rcu();
i40e_clean_tx_ring(vsi->xdp_rings[i]);
}
i40e_clean_rx_ring(vsi->rx_rings[i]);
}
}
/** * i40e_validate_mqprio_qopt- validate queue mapping info * @vsi: the VSI being configured * @mqprio_qopt: queue parametrs
**/ staticint i40e_validate_mqprio_qopt(struct i40e_vsi *vsi, struct tc_mqprio_qopt_offload *mqprio_qopt)
{
u64 sum_max_rate = 0;
u64 max_rate = 0; int i;
if (mqprio_qopt->qopt.offset[0] != 0 ||
mqprio_qopt->qopt.num_tc < 1 ||
mqprio_qopt->qopt.num_tc > I40E_MAX_TRAFFIC_CLASS) return -EINVAL; for (i = 0; ; i++) { if (!mqprio_qopt->qopt.count[i]) return -EINVAL; if (mqprio_qopt->min_rate[i]) {
dev_err(&vsi->back->pdev->dev, "Invalid min tx rate (greater than 0) specified\n"); return -EINVAL;
}
max_rate = mqprio_qopt->max_rate[i];
do_div(max_rate, I40E_BW_MBPS_DIVISOR);
sum_max_rate += max_rate;
if (i >= mqprio_qopt->qopt.num_tc - 1) break; if (mqprio_qopt->qopt.offset[i + 1] !=
(mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) return -EINVAL;
} if (vsi->num_queue_pairs <
(mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) {
dev_err(&vsi->back->pdev->dev, "Failed to create traffic channel, insufficient number of queues.\n"); return -EINVAL;
} if (sum_max_rate > i40e_get_link_speed(vsi)) {
dev_err(&vsi->back->pdev->dev, "Invalid max tx rate specified\n"); return -EINVAL;
} return 0;
}
/** * i40e_vsi_set_default_tc_config - set default values for tc configuration * @vsi: the VSI being configured
**/ staticvoid i40e_vsi_set_default_tc_config(struct i40e_vsi *vsi)
{
u16 qcount; int i;
/* Only TC0 is enabled */
vsi->tc_config.numtc = 1;
vsi->tc_config.enabled_tc = 1;
qcount = min_t(int, vsi->alloc_queue_pairs,
i40e_pf_get_max_q_per_tc(vsi->back)); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { /* For the TC that is not enabled set the offset to default * queue and allocate one queue for the given TC.
*/
vsi->tc_config.tc_info[i].qoffset = 0; if (i == 0)
vsi->tc_config.tc_info[i].qcount = qcount; else
vsi->tc_config.tc_info[i].qcount = 1;
vsi->tc_config.tc_info[i].netdev_tc = 0;
}
}
/** * i40e_del_macvlan_filter * @hw: pointer to the HW structure * @seid: seid of the channel VSI * @macaddr: the mac address to apply as a filter * @aq_err: store the admin Q error * * This function deletes a mac filter on the channel VSI which serves as the * macvlan. Returns 0 on success.
**/ staticint i40e_del_macvlan_filter(struct i40e_hw *hw, u16 seid, const u8 *macaddr, int *aq_err)
{ struct i40e_aqc_remove_macvlan_element_data element; int status;
/** * i40e_add_macvlan_filter * @hw: pointer to the HW structure * @seid: seid of the channel VSI * @macaddr: the mac address to apply as a filter * @aq_err: store the admin Q error * * This function adds a mac filter on the channel VSI which serves as the * macvlan. Returns 0 on success.
**/ staticint i40e_add_macvlan_filter(struct i40e_hw *hw, u16 seid, const u8 *macaddr, int *aq_err)
{ struct i40e_aqc_add_macvlan_element_data element;
u16 cmd_flags = 0; int status;
/** * i40e_reset_ch_rings - Reset the queue contexts in a channel * @vsi: the VSI we want to access * @ch: the channel we want to access
*/ staticvoid i40e_reset_ch_rings(struct i40e_vsi *vsi, struct i40e_channel *ch)
{ struct i40e_ring *tx_ring, *rx_ring;
u16 pf_q; int i;
for (i = 0; i < ch->num_queue_pairs; i++) {
pf_q = ch->base_queue + i;
tx_ring = vsi->tx_rings[pf_q];
tx_ring->ch = NULL;
rx_ring = vsi->rx_rings[pf_q];
rx_ring->ch = NULL;
}
}
/** * i40e_free_macvlan_channels * @vsi: the VSI we want to access * * This function frees the Qs of the channel VSI from * the stack and also deletes the channel VSIs which * serve as macvlans.
*/ staticvoid i40e_free_macvlan_channels(struct i40e_vsi *vsi)
{ struct i40e_channel *ch, *ch_tmp; int ret;
/* remove the VSI */
ret = i40e_aq_delete_element(&vsi->back->hw, ch->seid,
NULL); if (ret)
dev_err(&vsi->back->pdev->dev, "unable to remove channel (%d) for parent VSI(%d)\n",
ch->seid, parent_vsi->seid);
kfree(ch);
}
vsi->macvlan_cnt = 0;
}
/** * i40e_fwd_ring_up - bring the macvlan device up * @vsi: the VSI we want to access * @vdev: macvlan netdevice * @fwd: the private fwd structure
*/ staticint i40e_fwd_ring_up(struct i40e_vsi *vsi, struct net_device *vdev, struct i40e_fwd_adapter *fwd)
{ struct i40e_channel *ch = NULL, *ch_tmp, *iter; int ret = 0, num_tc = 1, i, aq_err; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw;
/* Go through the list and find an available channel */
list_for_each_entry_safe(iter, ch_tmp, &vsi->macvlan_list, list) { if (!i40e_is_channel_macvlan(iter)) {
iter->fwd = fwd; /* record configuration for macvlan interface in vdev */ for (i = 0; i < num_tc; i++)
netdev_bind_sb_channel_queue(vsi->netdev, vdev,
i,
iter->num_queue_pairs,
iter->base_queue); for (i = 0; i < iter->num_queue_pairs; i++) { struct i40e_ring *tx_ring, *rx_ring;
u16 pf_q;
pf_q = iter->base_queue + i;
/* Get to TX ring ptr */
tx_ring = vsi->tx_rings[pf_q];
tx_ring->ch = iter;
/* Get the RX ring ptr */
rx_ring = vsi->rx_rings[pf_q];
rx_ring->ch = iter;
}
ch = iter; break;
}
}
if (!ch) return -EINVAL;
/* Guarantee all rings are updated before we update the * MAC address filter.
*/
wmb();
/* Add a mac filter */
ret = i40e_add_macvlan_filter(hw, ch->seid, vdev->dev_addr, &aq_err); if (ret) { /* if we cannot add the MAC rule then disable the offload */
macvlan_release_l2fw_offload(vdev); for (i = 0; i < ch->num_queue_pairs; i++) { struct i40e_ring *rx_ring;
u16 pf_q;
/* Reconfigure RSS for main VSI with new max queue count */
vsi->rss_size = max_t(u16, num_qps, qcnt);
ret = i40e_vsi_config_rss(vsi); if (ret) {
dev_info(&pf->pdev->dev, "Failed to reconfig RSS for num_queues (%u)\n",
vsi->rss_size); return ret;
}
vsi->reconfig_rss = true;
dev_dbg(&vsi->back->pdev->dev, "Reconfigured RSS with num_queues (%u)\n", vsi->rss_size);
vsi->next_base_queue = num_qps;
vsi->cnt_q_avail = vsi->num_queue_pairs - num_qps;
/* Update the VSI after updating the VSI queue-mapping * information
*/
ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) {
dev_info(&pf->pdev->dev, "Update vsi tc config failed, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(hw->aq.asq_last_status)); return ret;
} /* update the local VSI info with updated queue map */
i40e_vsi_update_queue_map(vsi, &ctxt);
vsi->info.valid_sections = 0;
/* Create channels for macvlans */
INIT_LIST_HEAD(&vsi->macvlan_list); for (i = 0; i < macvlan_cnt; i++) {
ch = kzalloc(sizeof(*ch), GFP_KERNEL); if (!ch) {
ret = -ENOMEM; goto err_free;
}
INIT_LIST_HEAD(&ch->list);
ch->num_queue_pairs = qcnt; if (!i40e_setup_channel(pf, vsi, ch)) {
ret = -EINVAL;
kfree(ch); goto err_free;
}
ch->parent_vsi = vsi;
vsi->cnt_q_avail -= ch->num_queue_pairs;
vsi->macvlan_cnt++;
list_add_tail(&ch->list, &vsi->macvlan_list);
}
return ret;
err_free:
dev_info(&pf->pdev->dev, "Failed to setup macvlans\n");
i40e_free_macvlan_channels(vsi);
if (test_bit(I40E_FLAG_DCB_ENA, pf->flags)) {
netdev_info(netdev, "Macvlans are not supported when DCB is enabled\n"); return ERR_PTR(-EINVAL);
} if (i40e_is_tc_mqprio_enabled(pf)) {
netdev_info(netdev, "Macvlans are not supported when HW TC offload is on\n"); return ERR_PTR(-EINVAL);
} if (pf->num_lan_msix < I40E_MIN_MACVLAN_VECTORS) {
netdev_info(netdev, "Not enough vectors available to support macvlans\n"); return ERR_PTR(-EINVAL);
}
/* The macvlan device has to be a single Q device so that the * tc_to_txq field can be reused to pick the tx queue.
*/ if (netif_is_multiqueue(vdev)) return ERR_PTR(-ERANGE);
if (!vsi->macvlan_cnt) { /* reserve bit 0 for the pf device */
set_bit(0, vsi->fwd_bitmask);
/* Try to reserve as many queues as possible for macvlans. First * reserve 3/4th of max vectors, then half, then quarter and * calculate Qs per macvlan as you go
*/
vectors = pf->num_lan_msix; if (vectors <= I40E_MAX_MACVLANS && vectors > 64) { /* allocate 4 Qs per macvlan and 32 Qs to the PF*/
q_per_macvlan = 4;
macvlan_cnt = (vectors - 32) / 4;
} elseif (vectors <= 64 && vectors > 32) { /* allocate 2 Qs per macvlan and 16 Qs to the PF*/
q_per_macvlan = 2;
macvlan_cnt = (vectors - 16) / 2;
} elseif (vectors <= 32 && vectors > 16) { /* allocate 1 Q per macvlan and 16 Qs to the PF*/
q_per_macvlan = 1;
macvlan_cnt = vectors - 16;
} elseif (vectors <= 16 && vectors > 8) { /* allocate 1 Q per macvlan and 8 Qs to the PF */
q_per_macvlan = 1;
macvlan_cnt = vectors - 8;
} else { /* allocate 1 Q per macvlan and 1 Q to the PF */
q_per_macvlan = 1;
macvlan_cnt = vectors - 1;
}
if (macvlan_cnt == 0) return ERR_PTR(-EBUSY);
/* Quiesce VSI queues */
i40e_quiesce_vsi(vsi);
/* sets up the macvlans but does not "enable" them */
ret = i40e_setup_macvlans(vsi, macvlan_cnt, q_per_macvlan,
vdev); if (ret) return ERR_PTR(ret);
/* Unquiesce VSI */
i40e_unquiesce_vsi(vsi);
}
avail_macvlan = find_first_zero_bit(vsi->fwd_bitmask,
vsi->macvlan_cnt); if (avail_macvlan >= I40E_MAX_MACVLANS) return ERR_PTR(-EBUSY);
/* create the fwd struct */
fwd = kzalloc(sizeof(*fwd), GFP_KERNEL); if (!fwd) return ERR_PTR(-ENOMEM);
/* Set fwd ring up */
ret = i40e_fwd_ring_up(vsi, vdev, fwd); if (ret) { /* unbind the queues and drop the subordinate channel config */
netdev_unbind_sb_channel(netdev, vdev);
netdev_set_sb_channel(vdev, 0);
kfree(fwd); return ERR_PTR(-EINVAL);
}
return fwd;
}
/** * i40e_del_all_macvlans - Delete all the mac filters on the channels * @vsi: the VSI we want to access
*/ staticvoid i40e_del_all_macvlans(struct i40e_vsi *vsi)
{ struct i40e_channel *ch, *ch_tmp; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int aq_err, ret = 0;
/* Check if MFP enabled */ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags)) {
netdev_info(netdev, "Configuring TC not supported in MFP mode\n"); return ret;
} switch (mode) { case TC_MQPRIO_MODE_DCB:
clear_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags);
/* Check if DCB enabled to continue */ if (!test_bit(I40E_FLAG_DCB_ENA, pf->flags)) {
netdev_info(netdev, "DCB is not enabled for adapter\n"); return ret;
}
/* Check whether tc count is within enabled limit */ if (num_tc > i40e_pf_get_num_tc(pf)) {
netdev_info(netdev, "TC count greater than enabled on link for adapter\n"); return ret;
} break; case TC_MQPRIO_MODE_CHANNEL: if (test_bit(I40E_FLAG_DCB_ENA, pf->flags)) {
netdev_info(netdev, "Full offload of TC Mqprio options is not supported when DCB is enabled\n"); return ret;
} if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) return ret;
ret = i40e_validate_mqprio_qopt(vsi, mqprio_qopt); if (ret) return ret;
memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt));
set_bit(I40E_FLAG_TC_MQPRIO_ENA, pf->flags);
clear_bit(I40E_FLAG_DCB_ENA, pf->flags); break; default: return -EINVAL;
}
config_tc: /* Generate TC map for number of tc requested */ for (i = 0; i < num_tc; i++)
enabled_tc |= BIT(i);
/* Requesting same TC configuration as already enabled */ if (enabled_tc == vsi->tc_config.enabled_tc &&
mode != TC_MQPRIO_MODE_CHANNEL) return 0;
/* Quiesce VSI queues */
i40e_quiesce_vsi(vsi);
if (!hw && !i40e_is_tc_mqprio_enabled(pf))
i40e_remove_queue_channels(vsi);
/* Configure VSI for enabled TCs */
ret = i40e_vsi_config_tc(vsi, enabled_tc); if (ret) {
netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
vsi->seid);
need_reset = true; gotoexit;
} elseif (enabled_tc &&
(!is_power_of_2(vsi->tc_config.tc_info[0].qcount))) {
netdev_info(netdev, "Failed to create channel. Override queues (%u) not power of 2\n",
vsi->tc_config.tc_info[0].qcount);
ret = -EINVAL;
need_reset = true; gotoexit;
}
if (i40e_is_tc_mqprio_enabled(pf)) { if (vsi->mqprio_qopt.max_rate[0]) {
u64 max_tx_rate = i40e_bw_bytes_to_mbits(vsi,
vsi->mqprio_qopt.max_rate[0]);
ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); if (!ret) {
u64 credits = max_tx_rate;
do_div(credits, I40E_BW_CREDIT_DIVISOR);
dev_dbg(&vsi->back->pdev->dev, "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
max_tx_rate,
credits,
vsi->seid);
} else {
need_reset = true; gotoexit;
}
}
ret = i40e_configure_queue_channels(vsi); if (ret) {
vsi->num_queue_pairs = old_queue_pairs;
netdev_info(netdev, "Failed configuring queue channels\n");
need_reset = true; gotoexit;
}
}
exit: /* Reset the configuration data to defaults, only TC0 is enabled */ if (need_reset) {
i40e_vsi_set_default_tc_config(vsi);
need_reset = false;
}
/* Unquiesce VSI */
i40e_unquiesce_vsi(vsi); return ret;
}
/** * i40e_set_cld_element - sets cloud filter element data * @filter: cloud filter rule * @cld: ptr to cloud filter element data * * This is helper function to copy data into cloud filter element
**/ staticinlinevoid
i40e_set_cld_element(struct i40e_cloud_filter *filter, struct i40e_aqc_cloud_filters_element_data *cld)
{
u32 ipa; int i;
/* tenant_id is not supported by FW now, once the support is enabled * fill the cld->tenant_id with cpu_to_le32(filter->tenant_id)
*/ if (filter->tenant_id) return;
}
/** * i40e_add_del_cloud_filter - Add/del cloud filter * @vsi: pointer to VSI * @filter: cloud filter rule * @add: if true, add, if false, delete * * Add or delete a cloud filter for a specific flow spec. * Returns 0 if the filter were successfully added.
**/ int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, struct i40e_cloud_filter *filter, bool add)
{ struct i40e_aqc_cloud_filters_element_data cld_filter; struct i40e_pf *pf = vsi->back; int ret; staticconst u16 flag_table[128] = {
[I40E_CLOUD_FILTER_FLAGS_OMAC] =
I40E_AQC_ADD_CLOUD_FILTER_OMAC,
[I40E_CLOUD_FILTER_FLAGS_IMAC] =
I40E_AQC_ADD_CLOUD_FILTER_IMAC,
[I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN] =
I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN,
[I40E_CLOUD_FILTER_FLAGS_IMAC_TEN_ID] =
I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID,
[I40E_CLOUD_FILTER_FLAGS_OMAC_TEN_ID_IMAC] =
I40E_AQC_ADD_CLOUD_FILTER_OMAC_TEN_ID_IMAC,
[I40E_CLOUD_FILTER_FLAGS_IMAC_IVLAN_TEN_ID] =
I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_TEN_ID,
[I40E_CLOUD_FILTER_FLAGS_IIP] =
I40E_AQC_ADD_CLOUD_FILTER_IIP,
};
if (filter->flags >= ARRAY_SIZE(flag_table)) return -EIO;
memset(&cld_filter, 0, sizeof(cld_filter));
/* copy element needed to add cloud filter from filter */
i40e_set_cld_element(filter, &cld_filter);
if (filter->tunnel_type != I40E_CLOUD_TNL_TYPE_NONE)
cld_filter.flags = cpu_to_le16(filter->tunnel_type <<
I40E_AQC_ADD_CLOUD_TNL_TYPE_SHIFT);
if (add)
ret = i40e_aq_add_cloud_filters(&pf->hw, filter->seid,
&cld_filter, 1); else
ret = i40e_aq_rem_cloud_filters(&pf->hw, filter->seid,
&cld_filter, 1); if (ret)
dev_dbg(&pf->pdev->dev, "Failed to %s cloud filter using l4 port %u, err %d aq_err %d\n",
add ? "add" : "delete", filter->dst_port, ret,
pf->hw.aq.asq_last_status); else
dev_info(&pf->pdev->dev, "%s cloud filter for VSI: %d\n",
add ? "Added" : "Deleted", filter->seid); return ret;
}
/** * i40e_add_del_cloud_filter_big_buf - Add/del cloud filter using big_buf * @vsi: pointer to VSI * @filter: cloud filter rule * @add: if true, add, if false, delete * * Add or delete a cloud filter for a specific flow spec using big buffer. * Returns 0 if the filter were successfully added.
**/ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, struct i40e_cloud_filter *filter, bool add)
{ struct i40e_aqc_cloud_filters_element_bb cld_filter; struct i40e_pf *pf = vsi->back; int ret;
/* Both (src/dst) valid mac_addr are not supported */ if ((is_valid_ether_addr(filter->dst_mac) &&
is_valid_ether_addr(filter->src_mac)) ||
(is_multicast_ether_addr(filter->dst_mac) &&
is_multicast_ether_addr(filter->src_mac))) return -EOPNOTSUPP;
/* Big buffer cloud filter needs 'L4 port' to be non-zero. Also, UDP * ports are not supported via big buffer now.
*/ if (!filter->dst_port || filter->ip_proto == IPPROTO_UDP) return -EOPNOTSUPP;
/* adding filter using src_port/src_ip is not supported at this stage */ if (filter->src_port ||
(filter->src_ipv4 && filter->n_proto != ETH_P_IPV6) ||
!ipv6_addr_any(&filter->ip.v6.src_ip6)) return -EOPNOTSUPP;
memset(&cld_filter, 0, sizeof(cld_filter));
/* copy element needed to add cloud filter from filter */
i40e_set_cld_element(filter, &cld_filter.element);
if (is_valid_ether_addr(filter->dst_mac) ||
is_valid_ether_addr(filter->src_mac) ||
is_multicast_ether_addr(filter->dst_mac) ||
is_multicast_ether_addr(filter->src_mac)) { /* MAC + IP : unsupported mode */ if (filter->dst_ipv4) return -EOPNOTSUPP;
/* since we validated that L4 port must be valid before * we get here, start with respective "flags" value * and update if vlan is present or not
*/
cld_filter.element.flags =
cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_PORT);
if (filter->vlan_id) {
cld_filter.element.flags =
cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT);
}
} elseif ((filter->dst_ipv4 && filter->n_proto != ETH_P_IPV6) ||
!ipv6_addr_any(&filter->ip.v6.dst_ip6)) {
cld_filter.element.flags =
cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT); if (filter->n_proto == ETH_P_IPV6)
cld_filter.element.flags |=
cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV6); else
cld_filter.element.flags |=
cpu_to_le16(I40E_AQC_ADD_CLOUD_FLAGS_IPV4);
} else {
dev_err(&pf->pdev->dev, "either mac or ip has to be valid for cloud filter\n"); return -EINVAL;
}
/* Now copy L4 port in Byte 6..7 in general fields */
cld_filter.general_fields[I40E_AQC_ADD_CLOUD_FV_FLU_0X16_WORD0] =
be16_to_cpu(filter->dst_port);
if (add) { /* Validate current device switch mode, change if necessary */
ret = i40e_validate_and_set_switch_mode(vsi); if (ret) {
dev_err(&pf->pdev->dev, "failed to set switch mode, ret %d\n",
ret); return ret;
}
ret = i40e_aq_add_cloud_filters_bb(&pf->hw, filter->seid,
&cld_filter, 1);
} else {
ret = i40e_aq_rem_cloud_filters_bb(&pf->hw, filter->seid,
&cld_filter, 1);
}
if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct flow_match_eth_addrs match;
flow_rule_match_eth_addrs(rule, &match);
/* use is_broadcast and is_zero to check for all 0xf or 0 */ if (!is_zero_ether_addr(match.mask->dst)) { if (is_broadcast_ether_addr(match.mask->dst)) {
field_flags |= I40E_CLOUD_FIELD_OMAC;
} else {
dev_err(&pf->pdev->dev, "Bad ether dest mask %pM\n",
match.mask->dst); return -EIO;
}
}
if (flow_rule_has_control_flags(match.mask->flags,
f->common.extack)) return -EOPNOTSUPP;
}
if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { struct flow_match_ipv4_addrs match;
flow_rule_match_ipv4_addrs(rule, &match); if (match.mask->dst) { if (match.mask->dst == cpu_to_be32(0xffffffff)) {
field_flags |= I40E_CLOUD_FIELD_IIP;
} else {
dev_err(&pf->pdev->dev, "Bad ip dst mask %pI4b\n",
&match.mask->dst); return -EIO;
}
}
if (match.mask->src) { if (match.mask->src == cpu_to_be32(0xffffffff)) {
field_flags |= I40E_CLOUD_FIELD_IIP;
} else {
dev_err(&pf->pdev->dev, "Bad ip src mask %pI4b\n",
&match.mask->src); return -EIO;
}
}
if (field_flags & I40E_CLOUD_FIELD_TEN_ID) {
dev_err(&pf->pdev->dev, "Tenant id not allowed for ip filter\n"); return -EIO;
}
filter->dst_ipv4 = match.key->dst;
filter->src_ipv4 = match.key->src;
}
if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { struct flow_match_ipv6_addrs match;
flow_rule_match_ipv6_addrs(rule, &match);
/* src and dest IPV6 address should not be LOOPBACK * (0:0:0:0:0:0:0:1), which can be represented as ::1
*/ if (ipv6_addr_loopback(&match.key->dst) ||
ipv6_addr_loopback(&match.key->src)) {
dev_err(&pf->pdev->dev, "Bad ipv6, addr is LOOPBACK\n"); return -EIO;
} if (!ipv6_addr_any(&match.mask->dst) ||
!ipv6_addr_any(&match.mask->src))
field_flags |= I40E_CLOUD_FIELD_IIP;
switch (filter->ip_proto) { case IPPROTO_TCP: case IPPROTO_UDP: break; default:
dev_err(&pf->pdev->dev, "Only UDP and TCP transport are supported\n"); return -EINVAL;
}
}
filter->flags = field_flags; return 0;
}
/** * i40e_handle_tclass: Forward to a traffic class on the device * @vsi: Pointer to VSI * @tc: traffic class index on the device * @filter: Pointer to cloud filter structure *
**/ staticint i40e_handle_tclass(struct i40e_vsi *vsi, u32 tc, struct i40e_cloud_filter *filter)
{ struct i40e_channel *ch, *ch_tmp;
/* direct to a traffic class on the same device */ if (tc == 0) {
filter->seid = vsi->seid; return 0;
} elseif (vsi->tc_config.enabled_tc & BIT(tc)) { if (!filter->dst_port) {
dev_err(&vsi->back->pdev->dev, "Specify destination port to direct to traffic class that is not default\n"); return -EINVAL;
} if (list_empty(&vsi->ch_list)) return -EINVAL;
list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list,
list) { if (ch->seid == vsi->tc_seid_map[tc])
filter->seid = ch->seid;
} return 0;
}
dev_err(&vsi->back->pdev->dev, "TC is not enabled\n"); return -EINVAL;
}
/** * i40e_configure_clsflower - Configure tc flower filters * @vsi: Pointer to VSI * @cls_flower: Pointer to struct flow_cls_offload *
**/ staticint i40e_configure_clsflower(struct i40e_vsi *vsi, struct flow_cls_offload *cls_flower)
{ int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); struct i40e_cloud_filter *filter = NULL; struct i40e_pf *pf = vsi->back; int err = 0;
/** * i40e_find_cloud_filter - Find the could filter in the list * @vsi: Pointer to VSI * @cookie: filter specific cookie *
**/ staticstruct i40e_cloud_filter *i40e_find_cloud_filter(struct i40e_vsi *vsi, unsignedlong *cookie)
{ struct i40e_cloud_filter *filter = NULL; struct hlist_node *node2;
/** * i40e_open - Called when a network interface is made active * @netdev: network interface device structure * * The open entry point is called when a network interface is made * active by the system (IFF_UP). At this point all resources needed * for transmit and receive operations are allocated, the interrupt * handler is registered with the OS, the netdev watchdog subtask is * enabled, and the stack is notified that the interface is ready. * * Returns 0 on success, negative value on failure
**/ int i40e_open(struct net_device *netdev)
{ struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; int err;
/* disallow open during test or if eeprom is broken */ if (test_bit(__I40E_TESTING, pf->state) ||
test_bit(__I40E_BAD_EEPROM, pf->state)) return -EBUSY;
netif_carrier_off(netdev);
if (i40e_force_link_state(pf, true)) return -EAGAIN;
/** * i40e_netif_set_realnum_tx_rx_queues - Update number of tx/rx queues * @vsi: vsi structure * * This updates netdev's number of tx/rx queues * * Returns status of setting tx/rx queues
**/ staticint i40e_netif_set_realnum_tx_rx_queues(struct i40e_vsi *vsi)
{ int ret;
ret = netif_set_real_num_rx_queues(vsi->netdev,
vsi->num_queue_pairs); if (ret) return ret;
/** * i40e_vsi_open - * @vsi: the VSI to open * * Finish initialization of the VSI. * * Returns 0 on success, negative value on failure * * Note: expects to be called while under rtnl_lock()
**/ int i40e_vsi_open(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; char int_name[I40E_INT_NAME_STR_LEN]; int err;
/* allocate descriptors */
err = i40e_vsi_setup_tx_resources(vsi); if (err) goto err_setup_tx;
err = i40e_vsi_setup_rx_resources(vsi); if (err) goto err_setup_rx;
err = i40e_vsi_configure(vsi); if (err) goto err_setup_rx;
if (vsi->netdev) {
snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
dev_driver_string(&pf->pdev->dev), vsi->netdev->name);
err = i40e_vsi_request_irq(vsi, int_name); if (err) goto err_setup_rx;
/* Notify the stack of the actual queue counts. */
err = i40e_netif_set_realnum_tx_rx_queues(vsi); if (err) goto err_set_queues;
/** * i40e_fdir_filter_exit - Cleans up the Flow Director accounting * @pf: Pointer to PF * * This function destroys the hlist where all the Flow Director * filters were saved.
**/ staticvoid i40e_fdir_filter_exit(struct i40e_pf *pf)
{ struct i40e_fdir_filter *filter; struct i40e_flex_pit *pit_entry, *tmp; struct hlist_node *node2;
/* Reprogram the default input set for TCP/IPv4 */
i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV4_TCP,
I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
/* Reprogram the default input set for TCP/IPv6 */
i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV6_TCP,
I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK |
I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
/* Reprogram the default input set for UDP/IPv4 */
i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV4_UDP,
I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
/* Reprogram the default input set for UDP/IPv6 */
i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV6_UDP,
I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK |
I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
/* Reprogram the default input set for SCTP/IPv4 */
i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV4_SCTP,
I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
/* Reprogram the default input set for SCTP/IPv6 */
i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV6_SCTP,
I40E_L3_V6_SRC_MASK | I40E_L3_V6_DST_MASK |
I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
/* Reprogram the default input set for Other/IPv4 */
i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV4_OTHER,
I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
/* Reprogram the default input set for Other/IPv6 */
i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV6_OTHER,
I40E_L3_SRC_MASK | I40E_L3_DST_MASK);
/** * i40e_cloud_filter_exit - Cleans up the cloud filters * @pf: Pointer to PF * * This function destroys the hlist where all the cloud filters * were saved.
**/ staticvoid i40e_cloud_filter_exit(struct i40e_pf *pf)
{ struct i40e_cloud_filter *cfilter; struct hlist_node *node;
/** * i40e_close - Disables a network interface * @netdev: network interface device structure * * The close entry point is called when an interface is de-activated * by the OS. The hardware is still under the driver's control, but * this netdev interface is disabled. * * Returns 0, this is not allowed to fail
**/ int i40e_close(struct net_device *netdev)
{ struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi;
i40e_vsi_close(vsi);
return 0;
}
/** * i40e_do_reset - Start a PF or Core Reset sequence * @pf: board private structure * @reset_flags: which reset is requested * @lock_acquired: indicates whether or not the lock has been acquired * before this function was called. * * The essential difference in resets is that the PF Reset * doesn't clear the packet buffers, doesn't reset the PE * firmware, and doesn't bother the other PFs on the chip.
**/ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
{ struct i40e_vsi *vsi;
u32 val; int i;
/* do the biggest reset indicated */ if (reset_flags & BIT_ULL(__I40E_GLOBAL_RESET_REQUESTED)) {
/* Request a Global Reset * * This will start the chip's countdown to the actual full * chip reset event, and a warning interrupt to be sent * to all PFs, including the requestor. Our handler * for the warning interrupt will deal with the shutdown * and recovery of the switch setup.
*/
dev_dbg(&pf->pdev->dev, "GlobalR requested\n");
val = rd32(&pf->hw, I40E_GLGEN_RTRIG);
val |= I40E_GLGEN_RTRIG_GLOBR_MASK;
wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
/* Request a Core Reset * * Same as Global Reset, except does *not* include the MAC/PHY
*/
dev_dbg(&pf->pdev->dev, "CoreR requested\n");
val = rd32(&pf->hw, I40E_GLGEN_RTRIG);
val |= I40E_GLGEN_RTRIG_CORER_MASK;
wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
i40e_flush(&pf->hw);
} elseif (reset_flags & I40E_PF_RESET_FLAG) {
/* Request a PF Reset * * Resets only the PF-specific registers * * This goes directly to the tear-down and rebuild of * the switch, since we need to do all the recovery as * for the Core Reset.
*/
dev_dbg(&pf->pdev->dev, "PFR requested\n");
i40e_handle_reset_warning(pf, lock_acquired);
} elseif (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) { /* Request a PF Reset * * Resets PF and reinitializes PFs VSI.
*/
i40e_prep_for_reset(pf);
i40e_reset_and_rebuild(pf, true, lock_acquired);
dev_info(&pf->pdev->dev,
test_bit(I40E_FLAG_FW_LLDP_DIS, pf->flags) ? "FW LLDP is disabled\n" : "FW LLDP is enabled\n");
} elseif (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) { /* Find the VSI(s) that requested a re-init */
dev_info(&pf->pdev->dev, "VSI reinit requested\n");
i40e_pf_for_each_vsi(pf, i, vsi) { if (test_and_clear_bit(__I40E_VSI_REINIT_REQUESTED,
vsi->state))
i40e_vsi_reinit_locked(vsi);
}
} elseif (reset_flags & BIT_ULL(__I40E_DOWN_REQUESTED)) { /* Find the VSI(s) that needs to be brought down */
dev_info(&pf->pdev->dev, "VSI down requested\n");
/* X710-T*L 2.5G and 5G speeds don't support DCB */ if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
(hw->phy.link_info.link_speed &
~(I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB)) &&
!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) /* let firmware decide if the DCB should be disabled */
set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
/* Not DCB capable or capability disabled */ if (!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) return ret;
/* Ignore if event is not for Nearest Bridge */
type = ((mib->type >> I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT)
& I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
dev_dbg(&pf->pdev->dev, "LLDP event mib bridge type 0x%x\n", type); if (type != I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE) return ret;
/* Check MIB Type and return if event for Remote MIB update */
type = mib->type & I40E_AQ_LLDP_MIB_TYPE_MASK;
dev_dbg(&pf->pdev->dev, "LLDP event mib type %s\n", type ? "remote" : "local"); if (type == I40E_AQ_LLDP_MIB_REMOTE) { /* Update the remote cached instance and return */
ret = i40e_aq_get_dcb_config(hw, I40E_AQ_LLDP_MIB_REMOTE,
I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE,
&hw->remote_dcbx_config); gotoexit;
}
/* Store the old configuration */
tmp_dcbx_cfg = hw->local_dcbx_config;
/* Reset the old DCBx configuration data */
memset(&hw->local_dcbx_config, 0, sizeof(hw->local_dcbx_config)); /* Get updated DCBX data from firmware */
ret = i40e_get_dcb_config(&pf->hw); if (ret) { /* X710-T*L 2.5G and 5G speeds don't support DCB */ if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
(hw->phy.link_info.link_speed &
(I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) {
dev_warn(&pf->pdev->dev, "DCB is not supported for X710-T*L 2.5/5G speeds\n");
clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
} else {
dev_info(&pf->pdev->dev, "Failed querying DCB configuration data from firmware, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status));
} gotoexit;
}
/* No change detected in DCBX configs */ if (!memcmp(&tmp_dcbx_cfg, &hw->local_dcbx_config, sizeof(tmp_dcbx_cfg))) {
dev_dbg(&pf->pdev->dev, "No change detected in DCBX configuration.\n"); gotoexit;
}
/* Enable DCB tagging only when more than one TC */ if (i40e_dcb_get_num_tc(&hw->local_dcbx_config) > 1)
set_bit(I40E_FLAG_DCB_ENA, pf->flags); else
clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
set_bit(__I40E_PORT_SUSPENDED, pf->state); /* Reconfiguration needed quiesce all VSIs */
i40e_pf_quiesce_all_vsi(pf);
/* Changes in configuration update VEB/VSI */
i40e_dcb_reconfigure(pf);
ret = i40e_resume_port_tx(pf);
clear_bit(__I40E_PORT_SUSPENDED, pf->state); /* In case of error no point in resuming VSIs */ if (ret) gotoexit;
/* Wait for the PF's queues to be disabled */
ret = i40e_pf_wait_queues_disabled(pf); if (ret) { /* Schedule PF reset to recover */
set_bit(__I40E_PF_RESET_REQUESTED, pf->state);
i40e_service_event_schedule(pf);
} else {
i40e_pf_unquiesce_all_vsi(pf);
set_bit(__I40E_CLIENT_SERVICE_REQUESTED, pf->state);
set_bit(__I40E_CLIENT_L2_CHANGE, pf->state);
}
/** * i40e_reenable_fdir_sb - Restore FDir SB capability * @pf: board private structure
**/ staticvoid i40e_reenable_fdir_sb(struct i40e_pf *pf)
{ if (test_and_clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) &&
(I40E_DEBUG_FD & pf->hw.debug_mask))
dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n");
}
/** * i40e_reenable_fdir_atr - Restore FDir ATR capability * @pf: board private structure
**/ staticvoid i40e_reenable_fdir_atr(struct i40e_pf *pf)
{ if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) { /* ATR uses the same filtering logic as SB rules. It only * functions properly if the input set mask is at the default * settings. It is safe to restore the default input set * because there are no active TCPv4 filter rules.
*/
i40e_write_fd_input_set(pf, LIBIE_FILTER_PCTYPE_NONF_IPV4_TCP,
I40E_L3_SRC_MASK | I40E_L3_DST_MASK |
I40E_L4_SRC_MASK | I40E_L4_DST_MASK);
if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
(I40E_DEBUG_FD & pf->hw.debug_mask))
dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n");
}
}
switch (filter->flow_type) { case TCP_V4_FLOW:
pf->fd_tcp4_filter_cnt--; break; case UDP_V4_FLOW:
pf->fd_udp4_filter_cnt--; break; case SCTP_V4_FLOW:
pf->fd_sctp4_filter_cnt--; break; case TCP_V6_FLOW:
pf->fd_tcp6_filter_cnt--; break; case UDP_V6_FLOW:
pf->fd_udp6_filter_cnt--; break; case SCTP_V6_FLOW:
pf->fd_udp6_filter_cnt--; break; case IP_USER_FLOW: switch (filter->ipl4_proto) { case IPPROTO_TCP:
pf->fd_tcp4_filter_cnt--; break; case IPPROTO_UDP:
pf->fd_udp4_filter_cnt--; break; case IPPROTO_SCTP:
pf->fd_sctp4_filter_cnt--; break; case IPPROTO_IP:
pf->fd_ip4_filter_cnt--; break;
} break; case IPV6_USER_FLOW: switch (filter->ipl4_proto) { case IPPROTO_TCP:
pf->fd_tcp6_filter_cnt--; break; case IPPROTO_UDP:
pf->fd_udp6_filter_cnt--; break; case IPPROTO_SCTP:
pf->fd_sctp6_filter_cnt--; break; case IPPROTO_IP:
pf->fd_ip6_filter_cnt--; break;
} break;
}
/* Remove the filter from the list and free memory */
hlist_del(&filter->fdir_node);
kfree(filter);
}
/** * i40e_fdir_check_and_reenable - Function to reenabe FD ATR or SB if disabled * @pf: board private structure
**/ void i40e_fdir_check_and_reenable(struct i40e_pf *pf)
{ struct i40e_fdir_filter *filter;
u32 fcnt_prog, fcnt_avail; struct hlist_node *node;
if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state)) return;
/* Check if we have enough room to re-enable FDir SB capability. */
fcnt_prog = i40e_get_global_fd_count(pf);
fcnt_avail = pf->fdir_pf_filter_count; if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM)) ||
(pf->fd_add_err == 0) ||
(i40e_get_current_atr_cnt(pf) < pf->fd_atr_cnt))
i40e_reenable_fdir_sb(pf);
/* We should wait for even more space before re-enabling ATR. * Additionally, we cannot enable ATR as long as we still have TCP SB * rules active.
*/ if ((fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) &&
pf->fd_tcp4_filter_cnt == 0 && pf->fd_tcp6_filter_cnt == 0)
i40e_reenable_fdir_atr(pf);
/* if hw had a problem adding a filter, delete it */ if (pf->fd_inv > 0) {
hlist_for_each_entry_safe(filter, node,
&pf->fdir_filter_list, fdir_node) if (filter->fd_id == pf->fd_inv)
i40e_delete_invalid_filter(pf, filter);
}
}
#define I40E_MIN_FD_FLUSH_INTERVAL 10 #define I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE 30 /** * i40e_fdir_flush_and_replay - Function to flush all FD filters and replay SB * @pf: board private structure
**/ staticvoid i40e_fdir_flush_and_replay(struct i40e_pf *pf)
{ unsignedlong min_flush_time; int flush_wait_retry = 50; bool disable_atr = false; int fd_room; int reg;
if (!time_after(jiffies, pf->fd_flush_timestamp +
(I40E_MIN_FD_FLUSH_INTERVAL * HZ))) return;
/* If the flush is happening too quick and we have mostly SB rules we * should not re-enable ATR for some time.
*/
min_flush_time = pf->fd_flush_timestamp +
(I40E_MIN_FD_FLUSH_SB_ATR_UNSTABLE * HZ);
fd_room = pf->fdir_pf_filter_count - pf->fdir_pf_active_filters;
if (!(time_after(jiffies, min_flush_time)) &&
(fd_room < I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR)) { if (I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pf->pdev->dev, "ATR disabled, not enough FD filter space.\n");
disable_atr = true;
}
pf->fd_flush_timestamp = jiffies;
set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state); /* flush all filters */
wr32(&pf->hw, I40E_PFQF_CTL_1,
I40E_PFQF_CTL_1_CLEARFDTABLE_MASK);
i40e_flush(&pf->hw);
pf->fd_flush_cnt++;
pf->fd_add_err = 0; do { /* Check FD flush status every 5-6msec */
usleep_range(5000, 6000);
reg = rd32(&pf->hw, I40E_PFQF_CTL_1); if (!(reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK)) break;
} while (flush_wait_retry--); if (reg & I40E_PFQF_CTL_1_CLEARFDTABLE_MASK) {
dev_warn(&pf->pdev->dev, "FD table did not flush, needs more time\n");
} else { /* replay sideband filters */
i40e_fdir_filter_restore(i40e_pf_get_main_vsi(pf)); if (!disable_atr && !pf->fd_tcp4_filter_cnt)
clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
clear_bit(__I40E_FD_FLUSH_REQUESTED, pf->state); if (I40E_DEBUG_FD & pf->hw.debug_mask)
dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n");
}
}
/** * i40e_get_current_atr_cnt - Get the count of total FD ATR filters programmed * @pf: board private structure
**/
u32 i40e_get_current_atr_cnt(struct i40e_pf *pf)
{ return i40e_get_current_fd_count(pf) - pf->fdir_pf_active_filters;
}
/* if interface is down do nothing */ if (test_bit(__I40E_DOWN, pf->state)) return;
if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
i40e_fdir_flush_and_replay(pf);
i40e_fdir_check_and_reenable(pf);
}
/** * i40e_vsi_link_event - notify VSI of a link event * @vsi: vsi to be notified * @link_up: link up or down
**/ staticvoid i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up)
{ if (!vsi || test_bit(__I40E_VSI_DOWN, vsi->state)) return;
switch (vsi->type) { case I40E_VSI_MAIN: if (!vsi->netdev || !vsi->netdev_registered) break;
case I40E_VSI_SRIOV: case I40E_VSI_VMDQ2: case I40E_VSI_CTRL: case I40E_VSI_IWARP: case I40E_VSI_MIRROR: default: /* there is no notification for other VSIs */ break;
}
}
/** * i40e_veb_link_event - notify elements on the veb of a link event * @veb: veb to be notified * @link_up: link up or down
**/ staticvoid i40e_veb_link_event(struct i40e_veb *veb, bool link_up)
{ struct i40e_vsi *vsi; struct i40e_pf *pf; int i;
if (!veb || !veb->pf) return;
pf = veb->pf;
/* Send link event to contained VSIs */
i40e_pf_for_each_vsi(pf, i, vsi) if (vsi->uplink_seid == veb->seid)
i40e_vsi_link_event(vsi, link_up);
}
/* set this to force the get_link_status call to refresh state */
pf->hw.phy.get_link_info = true;
old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP);
status = i40e_get_link_status(&pf->hw, &new_link);
/* On success, disable temp link polling */ if (status == 0) {
clear_bit(__I40E_TEMP_LINK_POLLING, pf->state);
} else { /* Enable link polling temporarily until i40e_get_link_status * returns 0
*/
set_bit(__I40E_TEMP_LINK_POLLING, pf->state);
dev_dbg(&pf->pdev->dev, "couldn't get link state, status: %d\n",
status); return;
}
if (!new_link && old_link)
pf->link_down_events++;
i40e_print_link_message(vsi, new_link);
/* Notify the base of the switch tree connected to * the link. Floating VEBs are not notified.
*/ if (veb)
i40e_veb_link_event(veb, new_link); else
i40e_vsi_link_event(vsi, new_link);
if (pf->vf)
i40e_vc_notify_link_state(pf);
if (test_bit(I40E_FLAG_PTP_ENA, pf->flags))
i40e_ptp_set_increment(pf); #ifdef CONFIG_I40E_DCB if (new_link == old_link) return; /* Not SW DCB so firmware will take care of default settings */ if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) return;
/* We cover here only link down, as after link up in case of SW DCB * SW LLDP agent will take care of setting it up
*/ if (!new_link) {
dev_dbg(&pf->pdev->dev, "Reconfig DCB to single TC as result of Link Down\n");
memset(&pf->tmp_cfg, 0, sizeof(pf->tmp_cfg));
err = i40e_dcb_sw_default_config(pf); if (err) {
clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
} else {
pf->dcbx_cap = DCB_CAP_DCBX_HOST |
DCB_CAP_DCBX_VER_IEEE;
set_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
}
} #endif/* CONFIG_I40E_DCB */
}
/** * i40e_watchdog_subtask - periodic checks not using event driven response * @pf: board private structure
**/ staticvoid i40e_watchdog_subtask(struct i40e_pf *pf)
{ struct i40e_vsi *vsi; struct i40e_veb *veb; int i;
/* if interface is down do nothing */ if (test_bit(__I40E_DOWN, pf->state) ||
test_bit(__I40E_CONFIG_BUSY, pf->state)) return;
/* make sure we don't do these things too often */ if (time_before(jiffies, (pf->service_timer_previous +
pf->service_timer_period))) return;
pf->service_timer_previous = jiffies;
if (test_bit(I40E_FLAG_LINK_POLLING_ENA, pf->flags) ||
test_bit(__I40E_TEMP_LINK_POLLING, pf->state))
i40e_link_event(pf);
/* Update the stats for active netdevs so the network stack * can look at updated numbers whenever it cares to
*/
i40e_pf_for_each_vsi(pf, i, vsi) if (vsi->netdev)
i40e_update_stats(vsi);
if (test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags)) { /* Update the stats for the active switching components */
i40e_pf_for_each_veb(pf, i, veb)
i40e_update_veb_stats(veb);
}
i40e_ptp_rx_hang(pf);
i40e_ptp_tx_hang(pf);
}
/** * i40e_reset_subtask - Set up for resetting the device and driver * @pf: board private structure
**/ staticvoid i40e_reset_subtask(struct i40e_pf *pf)
{
u32 reset_flags = 0;
if (test_bit(__I40E_REINIT_REQUESTED, pf->state)) {
reset_flags |= BIT(__I40E_REINIT_REQUESTED);
clear_bit(__I40E_REINIT_REQUESTED, pf->state);
} if (test_bit(__I40E_PF_RESET_REQUESTED, pf->state)) {
reset_flags |= BIT(__I40E_PF_RESET_REQUESTED);
clear_bit(__I40E_PF_RESET_REQUESTED, pf->state);
} if (test_bit(__I40E_CORE_RESET_REQUESTED, pf->state)) {
reset_flags |= BIT(__I40E_CORE_RESET_REQUESTED);
clear_bit(__I40E_CORE_RESET_REQUESTED, pf->state);
} if (test_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state)) {
reset_flags |= BIT(__I40E_GLOBAL_RESET_REQUESTED);
clear_bit(__I40E_GLOBAL_RESET_REQUESTED, pf->state);
} if (test_bit(__I40E_DOWN_REQUESTED, pf->state)) {
reset_flags |= BIT(__I40E_DOWN_REQUESTED);
clear_bit(__I40E_DOWN_REQUESTED, pf->state);
}
/* If there's a recovery already waiting, it takes * precedence before starting a new reset sequence.
*/ if (test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) {
i40e_prep_for_reset(pf);
i40e_reset(pf);
i40e_rebuild(pf, false, false);
}
/* If we're already down or resetting, just bail */ if (reset_flags &&
!test_bit(__I40E_DOWN, pf->state) &&
!test_bit(__I40E_CONFIG_BUSY, pf->state)) {
i40e_do_reset(pf, reset_flags, false);
}
}
/* Do a new status request to re-enable LSE reporting * and load new status information into the hw struct * This completely ignores any state information * in the ARQ event info, instead choosing to always * issue the AQ update link status command.
*/
i40e_link_event(pf);
/* Check if module meets thermal requirements */ if (status->phy_type == I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP) {
dev_err(&pf->pdev->dev, "Rx/Tx is disabled on this device because the module does not meet thermal requirements.\n");
dev_err(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
} else { /* check for unqualified module, if link is down, suppress * the message if link was forced to be down.
*/ if ((status->link_info & I40E_AQ_MEDIA_AVAILABLE) &&
(!(status->an_info & I40E_AQ_QUALIFIED_MODULE)) &&
(!(status->link_info & I40E_AQ_LINK_UP)) &&
(!test_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags))) {
dev_err(&pf->pdev->dev, "Rx/Tx is disabled on this device because an unsupported SFP module type was detected.\n");
dev_err(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n");
}
}
}
case i40e_aqc_opc_get_link_status:
rtnl_lock();
i40e_handle_link_event(pf, &event);
rtnl_unlock(); break; case i40e_aqc_opc_send_msg_to_pf:
ret = i40e_vc_process_vf_msg(pf,
le16_to_cpu(event.desc.retval),
le32_to_cpu(event.desc.cookie_high),
le32_to_cpu(event.desc.cookie_low),
event.msg_buf,
event.msg_len); break; case i40e_aqc_opc_lldp_update_mib:
dev_dbg(&pf->pdev->dev, "ARQ: Update LLDP MIB event received\n"); #ifdef CONFIG_I40E_DCB
rtnl_lock();
i40e_handle_lldp_event(pf, &event);
rtnl_unlock(); #endif/* CONFIG_I40E_DCB */ break; case i40e_aqc_opc_event_lan_overflow:
dev_dbg(&pf->pdev->dev, "ARQ LAN queue overflow event received\n");
i40e_handle_lan_overflow_event(pf, &event); break; case i40e_aqc_opc_send_msg_to_peer:
dev_info(&pf->pdev->dev, "ARQ: Msg from other pf\n"); break; case i40e_aqc_opc_nvm_erase: case i40e_aqc_opc_nvm_update: case i40e_aqc_opc_oem_post_update:
i40e_debug(&pf->hw, I40E_DEBUG_NVM, "ARQ NVM operation 0x%04x completed\n",
opcode); break; default:
dev_info(&pf->pdev->dev, "ARQ: Unknown event 0x%04x ignored\n",
opcode); break;
}
} while (i++ < I40E_AQ_WORK_LIMIT);
if (i < I40E_AQ_WORK_LIMIT)
clear_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state);
/* re-enable Admin queue interrupt cause */
val = rd32(hw, I40E_PFINT_ICR0_ENA);
val |= I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
wr32(hw, I40E_PFINT_ICR0_ENA, val);
i40e_flush(hw);
kfree(event.msg_buf);
}
/** * i40e_verify_eeprom - make sure eeprom is good to use * @pf: board private structure
**/ staticvoid i40e_verify_eeprom(struct i40e_pf *pf)
{ int err;
err = i40e_diag_eeprom_test(&pf->hw); if (err) { /* retry in case of garbage read */
err = i40e_diag_eeprom_test(&pf->hw); if (err) {
dev_info(&pf->pdev->dev, "eeprom check failed (%d), Tx/Rx traffic disabled\n",
err);
set_bit(__I40E_BAD_EEPROM, pf->state);
}
}
/** * i40e_enable_pf_switch_lb * @pf: pointer to the PF structure * * enable switch loop back or die - no point in a return value
**/ staticvoid i40e_enable_pf_switch_lb(struct i40e_pf *pf)
{ struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_vsi_context ctxt; int ret;
ctxt.seid = pf->main_vsi_seid;
ctxt.pf_num = pf->hw.pf_id;
ctxt.vf_num = 0;
ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL); if (ret) {
dev_info(&pf->pdev->dev, "couldn't get PF vsi config, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); return;
}
ctxt.flags = I40E_AQ_VSI_TYPE_PF;
ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
ctxt.info.switch_id |= cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) {
dev_info(&pf->pdev->dev, "update vsi switch failed, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status));
}
}
/** * i40e_disable_pf_switch_lb * @pf: pointer to the PF structure * * disable switch loop back or die - no point in a return value
**/ staticvoid i40e_disable_pf_switch_lb(struct i40e_pf *pf)
{ struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_vsi_context ctxt; int ret;
ctxt.seid = pf->main_vsi_seid;
ctxt.pf_num = pf->hw.pf_id;
ctxt.vf_num = 0;
ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL); if (ret) {
dev_info(&pf->pdev->dev, "couldn't get PF vsi config, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); return;
}
ctxt.flags = I40E_AQ_VSI_TYPE_PF;
ctxt.info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
ctxt.info.switch_id &= ~cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL); if (ret) {
dev_info(&pf->pdev->dev, "update vsi switch failed, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status));
}
}
/** * i40e_config_bridge_mode - Configure the HW bridge mode * @veb: pointer to the bridge instance * * Configure the loop back mode for the LAN VSI that is downlink to the * specified HW bridge instance. It is expected this function is called * when a new HW bridge is instantiated.
**/ staticvoid i40e_config_bridge_mode(struct i40e_veb *veb)
{ struct i40e_pf *pf = veb->pf;
/** * i40e_reconstitute_veb - rebuild the VEB and VSIs connected to it * @veb: pointer to the VEB instance * * This is a function that builds the attached VSIs. We track the connections * through our own index numbers because the seid's from the HW could change * across the reset.
**/ staticint i40e_reconstitute_veb(struct i40e_veb *veb)
{ struct i40e_vsi *ctl_vsi = NULL; struct i40e_pf *pf = veb->pf; struct i40e_vsi *vsi; int v, ret;
/* As we do not maintain PV (port virtualizer) switch element then * there can be only one non-floating VEB that have uplink to MAC SEID * and its control VSI is the main one.
*/ if (WARN_ON(veb->uplink_seid && veb->uplink_seid != pf->mac_seid)) {
dev_err(&pf->pdev->dev, "Invalid uplink SEID for VEB %d\n", veb->idx); return -ENOENT;
}
if (veb->uplink_seid == pf->mac_seid) { /* Check that the LAN VSI has VEB owning flag set */
ctl_vsi = i40e_pf_get_main_vsi(pf);
if (WARN_ON(ctl_vsi->veb_idx != veb->idx ||
!(ctl_vsi->flags & I40E_VSI_FLAG_VEB_OWNER))) {
dev_err(&pf->pdev->dev, "Invalid control VSI for VEB %d\n", veb->idx); return -ENOENT;
}
/* Add the control VSI to switch */
ret = i40e_add_vsi(ctl_vsi); if (ret) {
dev_err(&pf->pdev->dev, "Rebuild of owner VSI for VEB %d failed: %d\n",
veb->idx, ret); return ret;
}
i40e_vsi_reset_stats(ctl_vsi);
}
/* create the VEB in the switch and move the VSI onto the VEB */
ret = i40e_add_veb(veb, ctl_vsi); if (ret) return ret;
if (veb->uplink_seid) { if (test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags))
veb->bridge_mode = BRIDGE_MODE_VEB; else
veb->bridge_mode = BRIDGE_MODE_VEPA;
i40e_config_bridge_mode(veb);
}
/* create the remaining VSIs attached to this VEB */
i40e_pf_for_each_vsi(pf, v, vsi) { if (vsi == ctl_vsi) continue;
if (vsi->veb_idx == veb->idx) {
vsi->uplink_seid = veb->seid;
ret = i40e_add_vsi(vsi); if (ret) {
dev_info(&pf->pdev->dev, "rebuild of vsi_idx %d failed: %d\n",
v, ret); return ret;
}
i40e_vsi_reset_stats(vsi);
}
}
return ret;
}
/** * i40e_get_capabilities - get info about the HW * @pf: the PF struct * @list_type: AQ capability to be queried
**/ staticint i40e_get_capabilities(struct i40e_pf *pf, enum i40e_admin_queue_opc list_type)
{ struct libie_aqc_list_caps_elem *cap_buf;
u16 data_size; int buf_len; int err;
buf_len = 40 * sizeof(struct libie_aqc_list_caps_elem); do {
cap_buf = kzalloc(buf_len, GFP_KERNEL); if (!cap_buf) return -ENOMEM;
/* this loads the data into the hw struct for us */
err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len,
&data_size, list_type,
NULL); /* data loaded, buffer no longer needed */
kfree(cap_buf);
if (pf->hw.aq.asq_last_status == LIBIE_AQ_RC_ENOMEM) { /* retry with a larger buffer */
buf_len = data_size;
} elseif (pf->hw.aq.asq_last_status != LIBIE_AQ_RC_OK || err) {
dev_info(&pf->pdev->dev, "capability discovery failed, err %pe aq_err %s\n",
ERR_PTR(err),
libie_aq_str(pf->hw.aq.asq_last_status)); return -ENODEV;
}
} while (err);
i40e_fdir_filter_exit(pf);
vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); if (vsi)
i40e_vsi_release(vsi);
}
/** * i40e_rebuild_cloud_filters - Rebuilds cloud filters for VSIs * @vsi: PF main vsi * @seid: seid of main or channel VSIs * * Rebuilds cloud filters associated with main VSI and channel VSIs if they * existed before reset
**/ staticint i40e_rebuild_cloud_filters(struct i40e_vsi *vsi, u16 seid)
{ struct i40e_cloud_filter *cfilter; struct i40e_pf *pf = vsi->back; struct hlist_node *node; int ret;
/* Add cloud filters back if they exist */
hlist_for_each_entry_safe(cfilter, node, &pf->cloud_filter_list,
cloud_node) { if (cfilter->seid != seid) continue;
if (cfilter->dst_port)
ret = i40e_add_del_cloud_filter_big_buf(vsi, cfilter, true); else
ret = i40e_add_del_cloud_filter(vsi, cfilter, true);
/** * i40e_rebuild_channels - Rebuilds channel VSIs if they existed before reset * @vsi: PF main vsi * * Rebuilds channel VSIs if they existed before reset
**/ staticint i40e_rebuild_channels(struct i40e_vsi *vsi)
{ struct i40e_channel *ch, *ch_tmp; int ret;
if (list_empty(&vsi->ch_list)) return 0;
list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { if (!ch->initialized) break; /* Proceed with creation of channel (VMDq2) VSI */
ret = i40e_add_channel(vsi->back, vsi->uplink_seid, ch); if (ret) {
dev_info(&vsi->back->pdev->dev, "failed to rebuild channels using uplink_seid %u\n",
vsi->uplink_seid); return ret;
} /* Reconfigure TX queues using QTX_CTL register */
ret = i40e_channel_config_tx_ring(vsi->back, vsi, ch); if (ret) {
dev_info(&vsi->back->pdev->dev, "failed to configure TX rings for channel %u\n",
ch->seid); return ret;
} /* update 'next_base_queue' */
vsi->next_base_queue = vsi->next_base_queue +
ch->num_queue_pairs; if (ch->max_tx_rate) {
u64 credits = ch->max_tx_rate;
if (i40e_set_bw_limit(vsi, ch->seid,
ch->max_tx_rate)) return -EINVAL;
do_div(credits, I40E_BW_CREDIT_DIVISOR);
dev_dbg(&vsi->back->pdev->dev, "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
ch->max_tx_rate,
credits,
ch->seid);
}
ret = i40e_rebuild_cloud_filters(vsi, ch->seid); if (ret) {
dev_dbg(&vsi->back->pdev->dev, "Failed to rebuild cloud filters for channel VSI %u\n",
ch->seid); return ret;
}
} return 0;
}
/** * i40e_clean_xps_state - clean xps state for every tx_ring * @vsi: ptr to the VSI
**/ staticvoid i40e_clean_xps_state(struct i40e_vsi *vsi)
{ int i;
if (vsi->tx_rings) for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->tx_rings[i])
clear_bit(__I40E_TX_XPS_INIT_DONE,
vsi->tx_rings[i]->state);
}
/** * i40e_prep_for_reset - prep for the core to reset * @pf: board private structure * * Close up the VFs and other things in prep for PF Reset.
**/ staticvoid i40e_prep_for_reset(struct i40e_pf *pf)
{ struct i40e_hw *hw = &pf->hw; struct i40e_vsi *vsi; int ret = 0;
u32 v;
clear_bit(__I40E_RESET_INTR_RECEIVED, pf->state); if (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state)) return; if (i40e_check_asq_alive(&pf->hw))
i40e_vc_notify_reset(pf);
dev_dbg(&pf->pdev->dev, "Tearing down internal switch for reset\n");
/* quiesce the VSIs and their queues that are not already DOWN */
i40e_pf_quiesce_all_vsi(pf);
/* Check if pointer to OEM version block is valid. */
i40e_read_nvm_word(hw, I40E_SR_NVM_OEM_VERSION_PTR, &block_offset); if (block_offset == 0xffff) return;
/* Check if OEM version block has correct length. */
i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_LENGTH_OFFSET,
&block_length); if (block_length < I40E_NVM_OEM_LENGTH) return;
/* Check if OEM version format is as expected. */
i40e_read_nvm_word(hw, block_offset + I40E_NVM_OEM_CAPABILITIES_OFFSET,
&capabilities); if ((capabilities & I40E_NVM_OEM_CAPABILITIES_MASK) != 0) return;
/** * i40e_rebuild - rebuild using a saved config * @pf: board private structure * @reinit: if the Main VSI needs to re-initialized. * @lock_acquired: indicates whether or not the lock has been acquired * before this function was called.
**/ staticvoid i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
{ constbool is_recovery_mode_reported = i40e_check_recovery_mode(pf); struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw; struct i40e_veb *veb; int ret;
u32 val; int v;
if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
is_recovery_mode_reported)
i40e_set_ethtool_ops(vsi->netdev);
/* rebuild the basics for the AdminQ, HMC, and initial HW switch */
ret = i40e_init_adminq(&pf->hw); if (ret) {
dev_info(&pf->pdev->dev, "Rebuild AdminQ failed, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); goto clear_recovery;
}
i40e_get_oem_version(&pf->hw);
if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state)) { /* The following delay is necessary for firmware update. */
mdelay(1000);
}
/* re-verify the eeprom if we just had an EMP reset */ if (test_and_clear_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state))
i40e_verify_eeprom(pf);
/* if we are going out of or into recovery mode we have to act * accordingly with regard to resources initialization * and deinitialization
*/ if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { if (i40e_get_capabilities(pf,
i40e_aqc_opc_list_func_capabilities)) goto end_unlock;
if (is_recovery_mode_reported) { /* we're staying in recovery mode so we'll reinitialize * misc vector here
*/ if (i40e_setup_misc_vector_for_recovery_mode(pf)) goto end_unlock;
} else { if (!lock_acquired)
rtnl_lock(); /* we're going out of recovery mode so we'll free * the IRQ allocated specifically for recovery mode * and restore the interrupt scheme
*/
free_irq(pf->pdev->irq, pf);
i40e_clear_interrupt_scheme(pf); if (i40e_restore_interrupt_scheme(pf)) goto end_unlock;
}
/* tell the firmware that we're starting */
i40e_send_version(pf);
/* bail out in case recovery mode was detected, as there is * no need for further configuration.
*/ goto end_unlock;
}
i40e_clear_pxe_mode(hw);
ret = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities); if (ret) goto end_core_reset;
ret = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
hw->func_caps.num_rx_qp, 0, 0); if (ret) {
dev_info(&pf->pdev->dev, "init_lan_hmc failed: %d\n", ret); goto end_core_reset;
}
ret = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY); if (ret) {
dev_info(&pf->pdev->dev, "configure_lan_hmc failed: %d\n", ret); goto end_core_reset;
}
#ifdef CONFIG_I40E_DCB /* Enable FW to write a default DCB config on link-up * unless I40E_FLAG_TC_MQPRIO was enabled or DCB * is not supported with new link speed
*/ if (i40e_is_tc_mqprio_enabled(pf)) {
i40e_aq_set_dcb_parameters(hw, false, NULL);
} else { if (I40E_IS_X710TL_DEVICE(hw->device_id) &&
(hw->phy.link_info.link_speed &
(I40E_LINK_SPEED_2_5GB | I40E_LINK_SPEED_5GB))) {
i40e_aq_set_dcb_parameters(hw, false, NULL);
dev_warn(&pf->pdev->dev, "DCB is not supported for X710-T*L 2.5/5G speeds\n");
clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
} else {
i40e_aq_set_dcb_parameters(hw, true, NULL);
ret = i40e_init_pf_dcb(pf); if (ret) {
dev_info(&pf->pdev->dev, "DCB init failed %d, disabled\n",
ret);
clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags); /* Continue without DCB enabled */
}
}
}
#endif/* CONFIG_I40E_DCB */ if (!lock_acquired)
rtnl_lock();
ret = i40e_setup_pf_switch(pf, reinit, true); if (ret) goto end_unlock;
/* The driver only wants link up/down and module qualification * reports from firmware. Note the negative logic.
*/
ret = i40e_aq_set_phy_int_mask(&pf->hw,
~(I40E_AQ_EVENT_LINK_UPDOWN |
I40E_AQ_EVENT_MEDIA_NA |
I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (ret)
dev_info(&pf->pdev->dev, "set phy mask fail, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status));
/* Rebuild the VSIs and VEBs that existed before reset. * They are still in our local switch element arrays, so only * need to rebuild the switch model in the HW. * * If there were VEBs but the reconstitution failed, we'll try * to recover minimal use by getting the basic PF VSI working.
*/ if (vsi->uplink_seid != pf->mac_seid) {
dev_dbg(&pf->pdev->dev, "attempting to rebuild switch\n");
/* Rebuild VEBs */
i40e_pf_for_each_veb(pf, v, veb) {
ret = i40e_reconstitute_veb(veb); if (!ret) continue;
/* If Main VEB failed, we're in deep doodoo, * so give up rebuilding the switch and set up * for minimal rebuild of PF VSI. * If orphan failed, we'll report the error * but try to keep going.
*/ if (veb->uplink_seid == pf->mac_seid) {
dev_info(&pf->pdev->dev, "rebuild of switch failed: %d, will try to set up simple PF connection\n",
ret);
vsi->uplink_seid = pf->mac_seid; break;
} elseif (veb->uplink_seid == 0) {
dev_info(&pf->pdev->dev, "rebuild of orphan VEB failed: %d\n",
ret);
}
}
}
if (vsi->uplink_seid == pf->mac_seid) {
dev_dbg(&pf->pdev->dev, "attempting to rebuild PF VSI\n"); /* no VEB, so rebuild only the Main VSI */
ret = i40e_add_vsi(vsi); if (ret) {
dev_info(&pf->pdev->dev, "rebuild of Main VSI failed: %d\n", ret); goto end_unlock;
}
}
ret = i40e_set_bw_limit(vsi, vsi->seid, max_tx_rate); if (ret) goto end_unlock;
credits = max_tx_rate;
do_div(credits, I40E_BW_CREDIT_DIVISOR);
dev_dbg(&vsi->back->pdev->dev, "Set tx rate of %llu Mbps (count of 50Mbps %llu) for vsi->seid %u\n",
max_tx_rate,
credits,
vsi->seid);
}
ret = i40e_rebuild_cloud_filters(vsi, vsi->seid); if (ret) goto end_unlock;
/* PF Main VSI is rebuild by now, go ahead and rebuild channel VSIs * for this main VSI if they exist
*/
ret = i40e_rebuild_channels(vsi); if (ret) goto end_unlock;
/* Reconfigure hardware for allowing smaller MSS in the case * of TSO, so that we avoid the MDD being fired and causing * a reset in the case of small MSS+TSO.
*/ #define I40E_REG_MSS 0x000E64DC #define I40E_REG_MSS_MIN_MASK 0x3FF0000 #define I40E_64BYTE_MSS 0x400000
val = rd32(hw, I40E_REG_MSS); if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) {
val &= ~I40E_REG_MSS_MIN_MASK;
val |= I40E_64BYTE_MSS;
wr32(hw, I40E_REG_MSS, val);
}
if (test_bit(I40E_HW_CAP_RESTART_AUTONEG, pf->hw.caps)) {
msleep(75);
ret = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (ret)
dev_info(&pf->pdev->dev, "link restart failed, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status));
} /* reinit the misc interrupt */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
ret = i40e_setup_misc_vector(pf); if (ret) goto end_unlock;
}
/* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out * PAUSE or PFC frames and potentially controlling traffic for other * PF/VF VSIs. * The FW can still send Flow control frames if enabled.
*/
i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
pf->main_vsi_seid);
/* restart the VSIs that were rebuilt and running before the reset */
i40e_pf_unquiesce_all_vsi(pf);
/* Release the RTNL lock before we start resetting VFs */ if (!lock_acquired)
rtnl_unlock();
/* Restore promiscuous settings */
ret = i40e_set_promiscuous(pf, pf->cur_promisc); if (ret)
dev_warn(&pf->pdev->dev, "Failed to restore promiscuous setting: %s, err %pe aq_err %s\n",
pf->cur_promisc ? "on" : "off",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status));
i40e_reset_all_vfs(pf, true);
/* tell the firmware that we're starting */
i40e_send_version(pf);
/* We've already released the lock, so don't do it again */ goto end_core_reset;
/** * i40e_reset_and_rebuild - reset and rebuild using a saved config * @pf: board private structure * @reinit: if the Main VSI needs to re-initialized. * @lock_acquired: indicates whether or not the lock has been acquired * before this function was called.
**/ staticvoid i40e_reset_and_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
{ int ret;
if (test_bit(__I40E_IN_REMOVE, pf->state)) return; /* Now we wait for GRST to settle out. * We don't have to delete the VEBs or VSIs from the hw switch * because the reset will make them disappear.
*/
ret = i40e_reset(pf); if (!ret)
i40e_rebuild(pf, reinit, lock_acquired); else
dev_err(&pf->pdev->dev, "%s: i40e_reset() FAILED", __func__);
}
/** * i40e_handle_reset_warning - prep for the PF to reset, reset and rebuild * @pf: board private structure * * Close up the VFs and other things in prep for a Core Reset, * then get ready to rebuild the world. * @lock_acquired: indicates whether or not the lock has been acquired * before this function was called.
**/ staticvoid i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
{
i40e_prep_for_reset(pf);
i40e_reset_and_rebuild(pf, false, lock_acquired);
}
/** * i40e_print_vfs_mdd_events - print VFs malicious driver detect event * @pf: pointer to the PF structure * * Called from i40e_handle_mdd_event to rate limit and print VFs MDD events.
*/ staticvoid i40e_print_vfs_mdd_events(struct i40e_pf *pf)
{ unsignedint i;
/* check that there are pending MDD events to print */ if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state)) return;
if (!__ratelimit(&pf->mdd_message_rate_limit)) return;
for (i = 0; i < pf->num_alloc_vfs; i++) { struct i40e_vf *vf = &pf->vf[i]; bool is_printed = false;
/* only print Rx MDD event message if there are new events */ if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count;
i40e_print_vf_mdd_event(pf, vf, false);
is_printed = true;
}
/* only print Tx MDD event message if there are new events */ if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count;
i40e_print_vf_mdd_event(pf, vf, true);
is_printed = true;
}
if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
dev_info(&pf->pdev->dev, "Use PF Control I/F to re-enable the VF #%d\n",
i);
}
}
/** * i40e_handle_mdd_event * @pf: pointer to the PF structure * * Called from the MDD irq handler to identify possibly malicious vfs
**/ staticvoid i40e_handle_mdd_event(struct i40e_pf *pf)
{ struct i40e_hw *hw = &pf->hw; bool mdd_detected = false; struct i40e_vf *vf;
u32 reg; int i;
if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) { /* Since the VF MDD event logging is rate limited, check if * there are pending MDD events.
*/
i40e_print_vfs_mdd_events(pf); return;
}
/* find what triggered the MDD event */
reg = rd32(hw, I40E_GL_MDET_TX); if (reg & I40E_GL_MDET_TX_VALID_MASK) {
u8 pf_num = FIELD_GET(I40E_GL_MDET_TX_PF_NUM_MASK, reg);
u16 vf_num = FIELD_GET(I40E_GL_MDET_TX_VF_NUM_MASK, reg);
u8 event = FIELD_GET(I40E_GL_MDET_TX_EVENT_MASK, reg);
u16 queue = FIELD_GET(I40E_GL_MDET_TX_QUEUE_MASK, reg) -
pf->hw.func_caps.base_queue; if (netif_msg_tx_err(pf))
dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on TX queue %d PF number 0x%02x VF number 0x%02x\n",
event, queue, pf_num, vf_num);
wr32(hw, I40E_GL_MDET_TX, 0xffffffff);
mdd_detected = true;
}
reg = rd32(hw, I40E_GL_MDET_RX); if (reg & I40E_GL_MDET_RX_VALID_MASK) {
u8 func = FIELD_GET(I40E_GL_MDET_RX_FUNCTION_MASK, reg);
u8 event = FIELD_GET(I40E_GL_MDET_RX_EVENT_MASK, reg);
u16 queue = FIELD_GET(I40E_GL_MDET_RX_QUEUE_MASK, reg) -
pf->hw.func_caps.base_queue; if (netif_msg_rx_err(pf))
dev_info(&pf->pdev->dev, "Malicious Driver Detection event 0x%02x on RX queue %d of function 0x%02x\n",
event, queue, func);
wr32(hw, I40E_GL_MDET_RX, 0xffffffff);
mdd_detected = true;
}
if (mdd_detected) {
reg = rd32(hw, I40E_PF_MDET_TX); if (reg & I40E_PF_MDET_TX_VALID_MASK) {
wr32(hw, I40E_PF_MDET_TX, 0xFFFF);
dev_dbg(&pf->pdev->dev, "TX driver issue detected on PF\n");
}
reg = rd32(hw, I40E_PF_MDET_RX); if (reg & I40E_PF_MDET_RX_VALID_MASK) {
wr32(hw, I40E_PF_MDET_RX, 0xFFFF);
dev_dbg(&pf->pdev->dev, "RX driver issue detected on PF\n");
}
}
/* see if one of the VFs needs its hand slapped */ for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) { bool is_mdd_on_tx = false; bool is_mdd_on_rx = false;
if ((is_mdd_on_tx || is_mdd_on_rx) &&
test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) { /* VF MDD event counters will be cleared by * reset, so print the event prior to reset.
*/ if (is_mdd_on_rx)
i40e_print_vf_mdd_event(pf, vf, false); if (is_mdd_on_tx)
i40e_print_vf_mdd_event(pf, vf, true);
/** * i40e_service_task - Run the driver's async subtasks * @work: pointer to work_struct containing our data
**/ staticvoid i40e_service_task(struct work_struct *work)
{ struct i40e_pf *pf = container_of(work, struct i40e_pf,
service_task); unsignedlong start_time = jiffies;
/* don't bother with service tasks if a reset is in progress */ if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
test_bit(__I40E_SUSPENDED, pf->state)) return;
if (test_and_set_bit(__I40E_SERVICE_SCHED, pf->state)) return;
if (!test_bit(__I40E_RECOVERY_MODE, pf->state)) {
i40e_detect_recover_hung(pf);
i40e_sync_filters_subtask(pf);
i40e_reset_subtask(pf);
i40e_handle_mdd_event(pf);
i40e_vc_process_vflr_event(pf);
i40e_watchdog_subtask(pf);
i40e_fdir_reinit_subtask(pf); if (test_and_clear_bit(__I40E_CLIENT_RESET, pf->state)) { /* Client subtask will reopen next time through. */
i40e_notify_client_of_netdev_close(pf, true);
} else {
i40e_client_subtask(pf); if (test_and_clear_bit(__I40E_CLIENT_L2_CHANGE,
pf->state))
i40e_notify_client_of_l2_param_changes(pf);
}
i40e_sync_filters_subtask(pf);
} else {
i40e_reset_subtask(pf);
}
i40e_clean_adminq_subtask(pf);
/* flush memory to make sure state is correct before next watchdog */
smp_mb__before_atomic();
clear_bit(__I40E_SERVICE_SCHED, pf->state);
/* If the tasks have taken longer than one timer cycle or there * is more work to be done, reschedule the service task now * rather than wait for the timer to tick again.
*/ if (time_after(jiffies, (start_time + pf->service_timer_period)) ||
test_bit(__I40E_ADMINQ_EVENT_PENDING, pf->state) ||
test_bit(__I40E_MDD_EVENT_PENDING, pf->state) ||
test_bit(__I40E_VFLR_EVENT_PENDING, pf->state))
i40e_service_event_schedule(pf);
}
/** * i40e_set_num_rings_in_vsi - Determine number of rings in the VSI * @vsi: the VSI being configured
**/ staticint i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back;
switch (vsi->type) { case I40E_VSI_MAIN:
vsi->alloc_queue_pairs = pf->num_lan_qps; if (!vsi->num_tx_desc)
vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
I40E_REQ_DESCRIPTOR_MULTIPLE); if (!vsi->num_rx_desc)
vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
I40E_REQ_DESCRIPTOR_MULTIPLE); if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
vsi->num_q_vectors = pf->num_lan_msix; else
vsi->num_q_vectors = 1;
case I40E_VSI_VMDQ2:
vsi->alloc_queue_pairs = pf->num_vmdq_qps; if (!vsi->num_tx_desc)
vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
I40E_REQ_DESCRIPTOR_MULTIPLE); if (!vsi->num_rx_desc)
vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
I40E_REQ_DESCRIPTOR_MULTIPLE);
vsi->num_q_vectors = pf->num_vmdq_msix; break;
case I40E_VSI_SRIOV:
vsi->alloc_queue_pairs = pf->num_vf_qps; if (!vsi->num_tx_desc)
vsi->num_tx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
I40E_REQ_DESCRIPTOR_MULTIPLE); if (!vsi->num_rx_desc)
vsi->num_rx_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
I40E_REQ_DESCRIPTOR_MULTIPLE); break;
default:
WARN_ON(1); return -ENODATA;
}
if (is_kdump_kernel()) {
vsi->num_tx_desc = I40E_MIN_NUM_DESCRIPTORS;
vsi->num_rx_desc = I40E_MIN_NUM_DESCRIPTORS;
}
return 0;
}
/** * i40e_vsi_alloc_arrays - Allocate queue and vector pointer arrays for the vsi * @vsi: VSI pointer * @alloc_qvectors: a bool to specify if q_vectors need to be allocated. * * On error: returns error code (negative) * On success: returns 0
**/ staticint i40e_vsi_alloc_arrays(struct i40e_vsi *vsi, bool alloc_qvectors)
{ struct i40e_ring **next_rings; int size; int ret = 0;
/* allocate memory for both Tx, XDP Tx and Rx ring pointers */
size = sizeof(struct i40e_ring *) * vsi->alloc_queue_pairs *
(i40e_enabled_xdp_vsi(vsi) ? 3 : 2);
vsi->tx_rings = kzalloc(size, GFP_KERNEL); if (!vsi->tx_rings) return -ENOMEM;
next_rings = vsi->tx_rings + vsi->alloc_queue_pairs; if (i40e_enabled_xdp_vsi(vsi)) {
vsi->xdp_rings = next_rings;
next_rings += vsi->alloc_queue_pairs;
}
vsi->rx_rings = next_rings;
if (alloc_qvectors) { /* allocate memory for q_vector pointers */
size = sizeof(struct i40e_q_vector *) * vsi->num_q_vectors;
vsi->q_vectors = kzalloc(size, GFP_KERNEL); if (!vsi->q_vectors) {
ret = -ENOMEM; goto err_vectors;
}
} return ret;
err_vectors:
kfree(vsi->tx_rings); return ret;
}
/** * i40e_vsi_mem_alloc - Allocates the next available struct vsi in the PF * @pf: board private structure * @type: type of VSI * * On error: returns error code (negative) * On success: returns vsi index in PF (positive)
**/ staticint i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
{ int ret = -ENODEV; struct i40e_vsi *vsi; int vsi_idx; int i;
/* Need to protect the allocation of the VSIs at the PF level */
mutex_lock(&pf->switch_mutex);
/* VSI list may be fragmented if VSI creation/destruction has * been happening. We can afford to do a quick scan to look * for any free VSIs in the list. * * find next empty vsi slot, looping back around if necessary
*/
i = pf->next_vsi; while (i < pf->num_alloc_vsi && pf->vsi[i])
i++; if (i >= pf->num_alloc_vsi) {
i = 0; while (i < pf->next_vsi && pf->vsi[i])
i++;
}
if (i < pf->num_alloc_vsi && !pf->vsi[i]) {
vsi_idx = i; /* Found one! */
} else {
ret = -ENODEV; goto unlock_pf; /* out of VSI slots! */
}
pf->next_vsi = ++i;
/** * i40e_vsi_free_arrays - Free queue and vector pointer arrays for the VSI * @vsi: VSI pointer * @free_qvectors: a bool to specify if q_vectors need to be freed. * * On error: returns error code (negative) * On success: returns 0
**/ staticvoid i40e_vsi_free_arrays(struct i40e_vsi *vsi, bool free_qvectors)
{ /* free the ring and vector containers */ if (free_qvectors) {
kfree(vsi->q_vectors);
vsi->q_vectors = NULL;
}
kfree(vsi->tx_rings);
vsi->tx_rings = NULL;
vsi->rx_rings = NULL;
vsi->xdp_rings = NULL;
}
/** * i40e_clear_rss_config_user - clear the user configured RSS hash keys * and lookup table * @vsi: Pointer to VSI structure
*/ staticvoid i40e_clear_rss_config_user(struct i40e_vsi *vsi)
{ if (!vsi) return;
/** * i40e_vsi_clear - Deallocate the VSI provided * @vsi: the VSI being un-configured
**/ staticint i40e_vsi_clear(struct i40e_vsi *vsi)
{ struct i40e_pf *pf;
if (!vsi) return 0;
if (!vsi->back) goto free_vsi;
pf = vsi->back;
mutex_lock(&pf->switch_mutex); if (!pf->vsi[vsi->idx]) {
dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](type %d)\n",
vsi->idx, vsi->idx, vsi->type); goto unlock_vsi;
}
if (pf->vsi[vsi->idx] != vsi) {
dev_err(&pf->pdev->dev, "pf->vsi[%d](type %d) != vsi[%d](type %d): no free!\n",
pf->vsi[vsi->idx]->idx,
pf->vsi[vsi->idx]->type,
vsi->idx, vsi->type); goto unlock_vsi;
}
/* updates the PF for this cleared vsi */
i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx);
/** * i40e_vsi_clear_rings - Deallocates the Rx and Tx rings for the provided VSI * @vsi: the VSI being cleaned
**/ staticvoid i40e_vsi_clear_rings(struct i40e_vsi *vsi)
{ int i;
if (vsi->tx_rings && vsi->tx_rings[0]) { for (i = 0; i < vsi->alloc_queue_pairs; i++) {
kfree_rcu(vsi->tx_rings[i], rcu);
WRITE_ONCE(vsi->tx_rings[i], NULL);
WRITE_ONCE(vsi->rx_rings[i], NULL); if (vsi->xdp_rings)
WRITE_ONCE(vsi->xdp_rings[i], NULL);
}
}
}
/** * i40e_alloc_rings - Allocates the Rx and Tx rings for the provided VSI * @vsi: the VSI being configured
**/ staticint i40e_alloc_rings(struct i40e_vsi *vsi)
{ int i, qpv = i40e_enabled_xdp_vsi(vsi) ? 3 : 2; struct i40e_pf *pf = vsi->back; struct i40e_ring *ring;
/* Set basic values in the rings to be used later during open() */ for (i = 0; i < vsi->alloc_queue_pairs; i++) { /* allocate space for both Tx and Rx in one shot */
ring = kcalloc(qpv, sizeof(struct i40e_ring), GFP_KERNEL); if (!ring) goto err_out;
/** * i40e_reserve_msix_vectors - Reserve MSI-X vectors in the kernel * @pf: board private structure * @vectors: the number of MSI-X vectors to request * * Returns the number of vectors reserved, or error
**/ staticint i40e_reserve_msix_vectors(struct i40e_pf *pf, int vectors)
{
vectors = pci_enable_msix_range(pf->pdev, pf->msix_entries,
I40E_MIN_MSIX, vectors); if (vectors < 0) {
dev_info(&pf->pdev->dev, "MSI-X vector reservation failed: %d\n", vectors);
vectors = 0;
}
return vectors;
}
/** * i40e_init_msix - Setup the MSIX capability * @pf: board private structure * * Work with the OS to set up the MSIX vectors needed. * * Returns the number of vectors reserved or negative on failure
**/ staticint i40e_init_msix(struct i40e_pf *pf)
{ struct i40e_hw *hw = &pf->hw; int cpus, extra_vectors; int vectors_left; int v_budget, i; int v_actual; int iwarp_requested = 0;
if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) return -ENODEV;
/* The number of vectors we'll request will be comprised of: * - Add 1 for "other" cause for Admin Queue events, etc. * - The number of LAN queue pairs * - Queues being used for RSS. * We don't need as many as max_rss_size vectors. * use rss_size instead in the calculation since that * is governed by number of cpus in the system. * - assumes symmetric Tx/Rx pairing * - The number of VMDq pairs * - The CPU count within the NUMA node if iWARP is enabled * Once we count this up, try the request. * * If we can't get what we want, we'll simplify to nearly nothing * and try again. If that still fails, we punt.
*/
vectors_left = hw->func_caps.num_msix_vectors;
v_budget = 0;
/* reserve one vector for miscellaneous handler */ if (vectors_left) {
v_budget++;
vectors_left--;
}
/* reserve some vectors for the main PF traffic queues. Initially we * only reserve at most 50% of the available vectors, in the case that * the number of online CPUs is large. This ensures that we can enable * extra features as well. Once we've enabled the other features, we * will use any remaining vectors to reach as close as we can to the * number of online CPUs.
*/
cpus = num_online_cpus();
pf->num_lan_msix = min_t(int, cpus, vectors_left / 2);
vectors_left -= pf->num_lan_msix;
/* reserve one vector for sideband flow director */ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { if (vectors_left) {
pf->num_fdsb_msix = 1;
v_budget++;
vectors_left--;
} else {
pf->num_fdsb_msix = 0;
}
}
/* can we reserve enough for iWARP? */ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
iwarp_requested = pf->num_iwarp_msix;
/* any vectors left over go for VMDq support */ if (test_bit(I40E_FLAG_VMDQ_ENA, pf->flags)) { if (!vectors_left) {
pf->num_vmdq_msix = 0;
pf->num_vmdq_qps = 0;
} else { int vmdq_vecs_wanted =
pf->num_vmdq_vsis * pf->num_vmdq_qps; int vmdq_vecs =
min_t(int, vectors_left, vmdq_vecs_wanted);
/* if we're short on vectors for what's desired, we limit * the queues per vmdq. If this is still more than are * available, the user will need to change the number of * queues/vectors used by the PF later with the ethtool * channels command
*/ if (vectors_left < vmdq_vecs_wanted) {
pf->num_vmdq_qps = 1;
vmdq_vecs_wanted = pf->num_vmdq_vsis;
vmdq_vecs = min_t(int,
vectors_left,
vmdq_vecs_wanted);
}
pf->num_vmdq_msix = pf->num_vmdq_qps;
/* On systems with a large number of SMP cores, we previously limited * the number of vectors for num_lan_msix to be at most 50% of the * available vectors, to allow for other features. Now, we add back * the remaining vectors. However, we ensure that the total * num_lan_msix will not exceed num_online_cpus(). To do this, we * calculate the number of vectors we can add without going over the * cap of CPUs. For systems with a small number of CPUs this will be * zero.
*/
extra_vectors = min_t(int, cpus - pf->num_lan_msix, vectors_left);
pf->num_lan_msix += extra_vectors;
vectors_left -= extra_vectors;
WARN(vectors_left < 0, "Calculation of remaining vectors underflowed. This is an accounting bug when determining total MSI-X vectors.\n");
} elseif (v_actual != v_budget) { /* If we have limited resources, we will start with no vectors * for the special features and then allocate vectors to some * of these features based on the policy and at the end disable * the features that did not get any vectors.
*/ int vec;
dev_info(&pf->pdev->dev, "MSI-X vector limit reached with %d, wanted %d, attempting to redistribute vectors\n",
v_actual, v_budget); /* reserve the misc vector */
vec = v_actual - 1;
/* Scale vector usage down */
pf->num_vmdq_msix = 1; /* force VMDqs to only one vector */
pf->num_vmdq_vsis = 1;
pf->num_vmdq_qps = 1;
/** * i40e_vsi_alloc_q_vector - Allocate memory for a single interrupt vector * @vsi: the VSI being configured * @v_idx: index of the vector in the vsi struct * * We allocate one q_vector. If allocation fails we return -ENOMEM.
**/ staticint i40e_vsi_alloc_q_vector(struct i40e_vsi *vsi, int v_idx)
{ struct i40e_q_vector *q_vector;
if (vsi->netdev)
netif_napi_add(vsi->netdev, &q_vector->napi, i40e_napi_poll);
/* tie q_vector and vsi together */
vsi->q_vectors[v_idx] = q_vector;
return 0;
}
/** * i40e_vsi_alloc_q_vectors - Allocate memory for interrupt vectors * @vsi: the VSI being configured * * We allocate one q_vector per queue interrupt. If allocation fails we * return -ENOMEM.
**/ staticint i40e_vsi_alloc_q_vectors(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; int err, v_idx, num_q_vectors;
/* if not MSIX, give the one vector only to the LAN VSI */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
num_q_vectors = vsi->num_q_vectors; elseif (vsi->type == I40E_VSI_MAIN)
num_q_vectors = 1; else return -EINVAL;
for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
err = i40e_vsi_alloc_q_vector(vsi, v_idx); if (err) goto err_out;
}
return 0;
err_out: while (v_idx--)
i40e_free_q_vector(vsi, v_idx);
/* rework the queue expectations without MSIX */
i40e_determine_queue_usage(pf);
}
}
if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags) &&
test_bit(I40E_FLAG_MSI_ENA, pf->flags)) {
dev_info(&pf->pdev->dev, "MSI-X not available, trying MSI\n");
vectors = pci_enable_msi(pf->pdev); if (vectors < 0) {
dev_info(&pf->pdev->dev, "MSI init failed - %d\n",
vectors);
clear_bit(I40E_FLAG_MSI_ENA, pf->flags);
}
vectors = 1; /* one MSI or Legacy vector */
}
if (!test_bit(I40E_FLAG_MSI_ENA, pf->flags) &&
!test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
dev_info(&pf->pdev->dev, "MSI-X and MSI not available, falling back to Legacy IRQ\n");
/* set up vector assignment tracking */
size = sizeof(struct i40e_lump_tracking) + (sizeof(u16) * vectors);
pf->irq_pile = kzalloc(size, GFP_KERNEL); if (!pf->irq_pile) return -ENOMEM;
pf->irq_pile->num_entries = vectors;
/* track first vector for misc interrupts, ignore return */
(void)i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT - 1);
return 0;
}
/** * i40e_restore_interrupt_scheme - Restore the interrupt scheme * @pf: private board data structure * * Restore the interrupt scheme that was cleared when we suspended the * device. This should be called during resume to re-allocate the q_vectors * and reacquire IRQs.
*/ staticint i40e_restore_interrupt_scheme(struct i40e_pf *pf)
{ struct i40e_vsi *vsi; int err, i;
/* We cleared the MSI and MSI-X flags when disabling the old interrupt * scheme. We need to re-enabled them here in order to attempt to * re-acquire the MSI or MSI-X vectors
*/
set_bit(I40E_FLAG_MSI_ENA, pf->flags);
set_bit(I40E_FLAG_MSIX_ENA, pf->flags);
err = i40e_init_interrupt_scheme(pf); if (err) return err;
/* Now that we've re-acquired IRQs, we need to remap the vectors and * rings together again.
*/
i40e_pf_for_each_vsi(pf, i, vsi) {
err = i40e_vsi_alloc_q_vectors(vsi); if (err) goto err_unwind;
i40e_vsi_map_rings_to_vectors(vsi);
}
err = i40e_setup_misc_vector(pf); if (err) goto err_unwind;
if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags))
i40e_client_update_msix_info(pf);
return 0;
err_unwind: while (i--) { if (pf->vsi[i])
i40e_vsi_free_q_vectors(pf->vsi[i]);
}
return err;
}
/** * i40e_setup_misc_vector_for_recovery_mode - Setup the misc vector to handle * non queue events in recovery mode * @pf: board private structure * * This sets up the handler for MSIX 0 or MSI/legacy, which is used to manage * the non-queue interrupts, e.g. AdminQ and errors in recovery mode. * This is handled differently than in recovery mode since no Tx/Rx resources * are being allocated.
**/ staticint i40e_setup_misc_vector_for_recovery_mode(struct i40e_pf *pf)
{ int err;
if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
err = i40e_setup_misc_vector(pf);
/** * i40e_setup_misc_vector - Setup the misc vector to handle non queue events * @pf: board private structure * * This sets up the handler for MSIX 0, which is used to manage the * non-queue interrupts, e.g. AdminQ and errors. This is not used * when in MSI or Legacy interrupt mode.
**/ staticint i40e_setup_misc_vector(struct i40e_pf *pf)
{ struct i40e_hw *hw = &pf->hw; int err = 0;
/* Only request the IRQ once, the first time through. */ if (!test_and_set_bit(__I40E_MISC_IRQ_REQUESTED, pf->state)) {
err = request_irq(pf->msix_entries[0].vector,
i40e_intr, 0, pf->int_name, pf); if (err) {
clear_bit(__I40E_MISC_IRQ_REQUESTED, pf->state);
dev_info(&pf->pdev->dev, "request_irq for %s failed: %d\n",
pf->int_name, err); return -EFAULT;
}
}
i40e_enable_misc_int_causes(pf);
/* associate no queues to the misc vector */
wr32(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_END_OF_LIST);
wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K >> 1);
i40e_flush(hw);
i40e_irq_dynamic_enable_icr0(pf);
return err;
}
/** * i40e_get_rss_aq - Get RSS keys and lut by using AQ commands * @vsi: Pointer to vsi structure * @seed: Buffter to store the hash keys * @lut: Buffer to store the lookup table entries * @lut_size: Size of buffer to store the lookup table entries * * Return 0 on success, negative on failure
*/ staticint i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed,
u8 *lut, u16 lut_size)
{ struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; int ret = 0;
if (seed) {
ret = i40e_aq_get_rss_key(hw, vsi->id,
(struct i40e_aqc_get_set_rss_key_data *)seed); if (ret) {
dev_info(&pf->pdev->dev, "Cannot get RSS key, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status)); return ret;
}
}
if (lut) { bool pf_lut = vsi->type == I40E_VSI_MAIN;
ret = i40e_aq_get_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); if (ret) {
dev_info(&pf->pdev->dev, "Cannot get RSS lut, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status)); return ret;
}
}
/* Fill out hash function seed */ if (seed) {
u32 *seed_dw = (u32 *)seed;
if (vsi->type == I40E_VSI_MAIN) { for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
wr32(hw, I40E_PFQF_HKEY(i), seed_dw[i]);
} elseif (vsi->type == I40E_VSI_SRIOV) { for (i = 0; i <= I40E_VFQF_HKEY1_MAX_INDEX; i++)
wr32(hw, I40E_VFQF_HKEY1(i, vf_id), seed_dw[i]);
} else {
dev_err(&pf->pdev->dev, "Cannot set RSS seed - invalid VSI type\n");
}
}
if (lut) {
u32 *lut_dw = (u32 *)lut;
if (vsi->type == I40E_VSI_MAIN) { if (lut_size != I40E_HLUT_ARRAY_SIZE) return -EINVAL; for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
wr32(hw, I40E_PFQF_HLUT(i), lut_dw[i]);
} elseif (vsi->type == I40E_VSI_SRIOV) { if (lut_size != I40E_VF_HLUT_ARRAY_SIZE) return -EINVAL; for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
wr32(hw, I40E_VFQF_HLUT1(i, vf_id), lut_dw[i]);
} else {
dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
}
}
i40e_flush(hw);
return 0;
}
/** * i40e_get_rss_reg - Get the RSS keys and lut by reading registers * @vsi: Pointer to VSI structure * @seed: Buffer to store the keys * @lut: Buffer to store the lookup table entries * @lut_size: Size of buffer to store the lookup table entries * * Returns 0 on success, negative on failure
*/ staticint i40e_get_rss_reg(struct i40e_vsi *vsi, u8 *seed,
u8 *lut, u16 lut_size)
{ struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw;
u16 i;
if (seed) {
u32 *seed_dw = (u32 *)seed;
for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
seed_dw[i] = i40e_read_rx_ctl(hw, I40E_PFQF_HKEY(i));
} if (lut) {
u32 *lut_dw = (u32 *)lut;
if (lut_size != I40E_HLUT_ARRAY_SIZE) return -EINVAL; for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
lut_dw[i] = rd32(hw, I40E_PFQF_HLUT(i));
}
return 0;
}
/** * i40e_config_rss - Configure RSS keys and lut * @vsi: Pointer to VSI structure * @seed: RSS hash seed * @lut: Lookup table * @lut_size: Lookup table size * * Returns 0 on success, negative on failure
*/ int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
{ struct i40e_pf *pf = vsi->back;
/** * i40e_get_rss - Get RSS keys and lut * @vsi: Pointer to VSI structure * @seed: Buffer to store the keys * @lut: Buffer to store the lookup table entries * @lut_size: Size of buffer to store the lookup table entries * * Returns 0 on success, negative on failure
*/ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
{ struct i40e_pf *pf = vsi->back;
/* Determine the RSS table size based on the hardware capabilities */
reg_val = i40e_read_rx_ctl(hw, I40E_PFQF_CTL_0);
reg_val = (pf->rss_table_size == 512) ?
(reg_val | I40E_PFQF_CTL_0_HASHLUTSIZE_512) :
(reg_val & ~I40E_PFQF_CTL_0_HASHLUTSIZE_512);
i40e_write_rx_ctl(hw, I40E_PFQF_CTL_0, reg_val);
/* Determine the RSS size of the VSI */ if (!vsi->rss_size) {
u16 qcount; /* If the firmware does something weird during VSI init, we * could end up with zero TCs. Check for that to avoid * divide-by-zero. It probably won't pass traffic, but it also * won't panic.
*/
qcount = vsi->num_queue_pairs /
(vsi->tc_config.numtc ? vsi->tc_config.numtc : 1);
vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
} if (!vsi->rss_size) return -EINVAL;
lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) return -ENOMEM;
/* Use user configured lut if there is one, otherwise use default */ if (vsi->rss_lut_user)
memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); else
i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size);
/* Use user configured hash key if there is one, otherwise * use default.
*/ if (vsi->rss_hkey_user)
memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); else
netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE);
ret = i40e_config_rss(vsi, seed, lut, vsi->rss_table_size);
kfree(lut);
return ret;
}
/** * i40e_reconfig_rss_queues - change number of queues for rss and rebuild * @pf: board private structure * @queue_count: the requested queue count for rss. * * returns 0 if rss is not enabled, if enabled returns the final rss queue * count which may be different from the requested queue count. * Note: expects to be called while under rtnl_lock()
**/ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
{ struct i40e_vsi *vsi = i40e_pf_get_main_vsi(pf); int new_rss_size;
if (!test_bit(I40E_FLAG_RSS_ENA, pf->flags)) return 0;
if (queue_count != vsi->num_queue_pairs) {
u16 qcount;
vsi->req_queue_pairs = queue_count;
i40e_prep_for_reset(pf); if (test_bit(__I40E_IN_REMOVE, pf->state)) return pf->alloc_rss_size;
pf->alloc_rss_size = new_rss_size;
i40e_reset_and_rebuild(pf, true, true);
/* Discard the user configured hash keys and lut, if less * queues are enabled.
*/ if (queue_count < vsi->rss_size) {
i40e_clear_rss_config_user(vsi);
dev_dbg(&pf->pdev->dev, "discard user configured hash keys and lut\n");
}
/* Reset vsi->rss_size, as number of enabled queues changed */
qcount = vsi->num_queue_pairs / vsi->tc_config.numtc;
vsi->rss_size = min_t(int, pf->alloc_rss_size, qcount);
/** * i40e_get_partition_bw_setting - Retrieve BW settings for this PF partition * @pf: board private structure
**/ int i40e_get_partition_bw_setting(struct i40e_pf *pf)
{ bool min_valid, max_valid;
u32 max_bw, min_bw; int status;
status = i40e_read_bw_from_alt_ram(&pf->hw, &max_bw, &min_bw,
&min_valid, &max_valid);
if (!status) { if (min_valid)
pf->min_bw = min_bw; if (max_valid)
pf->max_bw = max_bw;
}
return status;
}
/** * i40e_set_partition_bw_setting - Set BW settings for this PF partition * @pf: board private structure
**/ int i40e_set_partition_bw_setting(struct i40e_pf *pf)
{ struct i40e_aqc_configure_partition_bw_data bw_data; int status;
memset(&bw_data, 0, sizeof(bw_data));
/* Set the valid bit for this PF */
bw_data.pf_valid_bits = cpu_to_le16(BIT(pf->hw.pf_id));
bw_data.max_bw[pf->hw.pf_id] = pf->max_bw & I40E_ALT_BW_VALUE_MASK;
bw_data.min_bw[pf->hw.pf_id] = pf->min_bw & I40E_ALT_BW_VALUE_MASK;
/* Set the new bandwidths */
status = i40e_aq_configure_partition_bw(&pf->hw, &bw_data, NULL);
return status;
}
/** * i40e_is_total_port_shutdown_enabled - read NVM and return value * if total port shutdown feature is enabled for this PF * @pf: board private structure
**/ staticbool i40e_is_total_port_shutdown_enabled(struct i40e_pf *pf)
{ #define I40E_TOTAL_PORT_SHUTDOWN_ENABLED BIT(4) #define I40E_FEATURES_ENABLE_PTR 0x2A #define I40E_CURRENT_SETTING_PTR 0x2B #define I40E_LINK_BEHAVIOR_WORD_OFFSET 0x2D #define I40E_LINK_BEHAVIOR_WORD_LENGTH 0x1 #define I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED BIT(0) #define I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH 4
u16 sr_emp_sr_settings_ptr = 0;
u16 features_enable = 0;
u16 link_behavior = 0; int read_status = 0; bool ret = false;
read_status = i40e_read_nvm_word(&pf->hw,
I40E_SR_EMP_SR_SETTINGS_PTR,
&sr_emp_sr_settings_ptr); if (read_status) goto err_nvm;
read_status = i40e_read_nvm_word(&pf->hw,
sr_emp_sr_settings_ptr +
I40E_FEATURES_ENABLE_PTR,
&features_enable); if (read_status) goto err_nvm; if (I40E_TOTAL_PORT_SHUTDOWN_ENABLED & features_enable) {
read_status = i40e_read_nvm_module_data(&pf->hw,
I40E_SR_EMP_SR_SETTINGS_PTR,
I40E_CURRENT_SETTING_PTR,
I40E_LINK_BEHAVIOR_WORD_OFFSET,
I40E_LINK_BEHAVIOR_WORD_LENGTH,
&link_behavior); if (read_status) goto err_nvm;
link_behavior >>= (pf->hw.port * I40E_LINK_BEHAVIOR_PORT_BIT_LENGTH);
ret = I40E_LINK_BEHAVIOR_OS_FORCED_ENABLED & link_behavior;
} return ret;
err_nvm:
dev_warn(&pf->pdev->dev, "total-port-shutdown feature is off due to read nvm error: %pe\n",
ERR_PTR(read_status)); return ret;
}
/** * i40e_sw_init - Initialize general software structures (struct i40e_pf) * @pf: board private structure to initialize * * i40e_sw_init initializes the Adapter private data structure. * Fields are initialized based on PCI device information and * OS network device settings (MTU size).
**/ staticint i40e_sw_init(struct i40e_pf *pf)
{ int err = 0; int size;
u16 pow;
/* Depending on PF configurations, it is possible that the RSS * maximum might end up larger than the available queues
*/
pf->rss_size_max = BIT(pf->hw.func_caps.rss_table_entry_width);
pf->alloc_rss_size = 1;
pf->rss_table_size = pf->hw.func_caps.rss_table_size;
pf->rss_size_max = min_t(int, pf->rss_size_max,
pf->hw.func_caps.num_tx_qp);
/* find the next higher power-of-2 of num cpus */
pow = roundup_pow_of_two(num_online_cpus());
pf->rss_size_max = min_t(int, pf->rss_size_max, pow);
if (pf->hw.func_caps.iwarp && num_online_cpus() != 1) {
set_bit(I40E_FLAG_IWARP_ENA, pf->flags); /* IWARP needs one extra vector for CQP just like MISC.*/
pf->num_iwarp_msix = (int)num_online_cpus() + 1;
} /* Stopping FW LLDP engine is supported on XL710 and X722 * starting from FW versions determined in i40e_init_adminq. * Stopping the FW LLDP engine is not supported on XL710 * if NPAR is functioning so unset this hw flag in this case.
*/ if (pf->hw.mac.type == I40E_MAC_XL710 &&
pf->hw.func_caps.npar_enable)
clear_bit(I40E_HW_CAP_FW_LLDP_STOPPABLE, pf->hw.caps);
if (pf->hw.mac.type != I40E_MAC_X722 &&
i40e_is_total_port_shutdown_enabled(pf)) { /* Link down on close must be on when total port shutdown * is enabled for a given port
*/
set_bit(I40E_FLAG_TOTAL_PORT_SHUTDOWN_ENA, pf->flags);
set_bit(I40E_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags);
dev_info(&pf->pdev->dev, "total-port-shutdown was enabled, link-down-on-close is forced on\n");
}
mutex_init(&pf->switch_mutex);
sw_init_done: return err;
}
/** * i40e_set_ntuple - set the ntuple feature flag and take action * @pf: board private structure to initialize * @features: the feature set that the stack is suggesting * * returns a bool to indicate if reset needs to happen
**/ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features)
{ bool need_reset = false;
/* Check if Flow Director n-tuple support was enabled or disabled. If * the state changed, we need to reset.
*/ if (features & NETIF_F_NTUPLE) { /* Enable filters and mark for reset */ if (!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags))
need_reset = true; /* enable FD_SB only if there is MSI-X vector and no cloud * filters exist
*/ if (pf->num_fdsb_msix > 0 && !pf->num_cloud_filters) {
set_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
clear_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
}
} else { /* turn off filters, mark for reset and clear SW filter list */ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) {
need_reset = true;
i40e_fdir_filter_exit(pf);
}
clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
clear_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state);
set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
/* reset fd counters */
pf->fd_add_err = 0;
pf->fd_atr_cnt = 0; /* if ATR was auto disabled it can be re-enabled. */ if (test_and_clear_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state)) if (test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
(I40E_DEBUG_FD & pf->hw.debug_mask))
dev_info(&pf->pdev->dev, "ATR re-enabled.\n");
} return need_reset;
}
/** * i40e_clear_rss_lut - clear the rx hash lookup table * @vsi: the VSI being configured
**/ staticvoid i40e_clear_rss_lut(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw;
u16 vf_id = vsi->vf_id;
u8 i;
if (vsi->type == I40E_VSI_MAIN) { for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++)
wr32(hw, I40E_PFQF_HLUT(i), 0);
} elseif (vsi->type == I40E_VSI_SRIOV) { for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++)
i40e_write_rx_ctl(hw, I40E_VFQF_HLUT1(i, vf_id), 0);
} else {
dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n");
}
}
/** * i40e_set_loopback - turn on/off loopback mode on underlying PF * @vsi: ptr to VSI * @ena: flag to indicate the on/off setting
*/ staticint i40e_set_loopback(struct i40e_vsi *vsi, bool ena)
{ bool if_running = netif_running(vsi->netdev) &&
!test_and_set_bit(__I40E_VSI_DOWN, vsi->state); int ret;
if (if_running)
i40e_down(vsi);
ret = i40e_aq_set_mac_loopback(&vsi->back->hw, ena, NULL); if (ret)
netdev_err(vsi->netdev, "Failed to toggle loopback state\n"); if (if_running)
i40e_up(vsi);
return ret;
}
/** * i40e_set_features - set the netdev feature flags * @netdev: ptr to the netdev being adjusted * @features: the feature set that the stack is suggesting * Note: expects to be called while under rtnl_lock()
**/ staticint i40e_set_features(struct net_device *netdev,
netdev_features_t features)
{ struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; bool need_reset;
/** * i40e_ndo_fdb_add - add an entry to the hardware database * @ndm: the input from the stack * @tb: pointer to array of nladdr (unused) * @dev: the net device pointer * @addr: the MAC address entry being added * @vid: VLAN ID * @flags: instructions from stack about fdb operation * @notified: whether notification was emitted * @extack: netlink extended ack, unused currently
*/ staticint i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, constunsignedchar *addr, u16 vid,
u16 flags, bool *notified, struct netlink_ext_ack *extack)
{ struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_pf *pf = np->vsi->back; int err = 0;
if (!test_bit(I40E_FLAG_SRIOV_ENA, pf->flags)) return -EOPNOTSUPP;
if (vid) {
pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name); return -EINVAL;
}
/* Hardware does not support aging addresses so if a * ndm_state is given only allow permanent addresses
*/ if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) {
netdev_info(dev, "FDB only supports static addresses\n"); return -EINVAL;
}
/* Only return duplicate errors if NLM_F_EXCL is set */ if (err == -EEXIST && !(flags & NLM_F_EXCL))
err = 0;
return err;
}
/** * i40e_ndo_bridge_setlink - Set the hardware bridge mode * @dev: the netdev being configured * @nlh: RTNL message * @flags: bridge flags * @extack: netlink extended ack * * Inserts a new hardware bridge if not already created and * enables the bridging mode requested (VEB or VEPA). If the * hardware bridge has already been inserted and the request * is to change the mode then that requires a PF reset to * allow rebuild of the components with required hardware * bridge mode enabled. * * Note: expects to be called while under rtnl_lock()
**/ staticint i40e_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
u16 flags, struct netlink_ext_ack *extack)
{ struct i40e_netdev_priv *np = netdev_priv(dev); struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; struct nlattr *attr, *br_spec; struct i40e_veb *veb; int rem;
/* Only for PF VSI for now */ if (vsi->type != I40E_VSI_MAIN) return -EOPNOTSUPP;
/* Find the HW bridge for PF VSI */
veb = i40e_pf_get_veb_by_seid(pf, vsi->uplink_seid);
br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); if (!br_spec) return -EINVAL;
/** * i40e_features_check - Validate encapsulated packet conforms to limits * @skb: skb buff * @dev: This physical port's netdev * @features: Offload features that the stack believes apply
**/ static netdev_features_t i40e_features_check(struct sk_buff *skb, struct net_device *dev,
netdev_features_t features)
{
size_t len;
/* No point in doing any of this if neither checksum nor GSO are * being requested for this frame. We can rule out both by just * checking for CHECKSUM_PARTIAL
*/ if (skb->ip_summed != CHECKSUM_PARTIAL) return features;
/* We cannot support GSO if the MSS is going to be less than * 64 bytes. If it is then we need to drop support for GSO.
*/ if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
features &= ~NETIF_F_GSO_MASK;
/* MACLEN can support at most 63 words */
len = skb_network_offset(skb); if (len & ~(63 * 2)) goto out_err;
/* IPLEN and EIPLEN can support at most 127 dwords */
len = skb_network_header_len(skb); if (len & ~(127 * 4)) goto out_err;
if (skb->encapsulation) { /* L4TUNLEN can support 127 words */
len = skb_inner_network_header(skb) - skb_transport_header(skb); if (len & ~(127 * 2)) goto out_err;
/* IPLEN can support at most 127 dwords */
len = skb_inner_transport_header(skb) -
skb_inner_network_header(skb); if (len & ~(127 * 4)) goto out_err;
}
/* No need to validate L4LEN as TCP is the only protocol with a * flexible value and we support all possible values supported * by TCP, which is at most 15 dwords
*/
return features;
out_err: return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
}
/** * i40e_xdp_setup - add/remove an XDP program * @vsi: VSI to changed * @prog: XDP program * @extack: netlink extended ack
**/ staticint i40e_xdp_setup(struct i40e_vsi *vsi, struct bpf_prog *prog, struct netlink_ext_ack *extack)
{ int frame_size = i40e_max_vsi_frame_size(vsi, prog); struct i40e_pf *pf = vsi->back; struct bpf_prog *old_prog; bool need_reset; int i;
/* VSI shall be deleted in a moment, block loading new programs */ if (prog && test_bit(__I40E_IN_REMOVE, pf->state)) return -EINVAL;
/* Don't allow frames that span over multiple buffers */ if (vsi->netdev->mtu > frame_size - I40E_PACKET_HDR_PAD) {
NL_SET_ERR_MSG_MOD(extack, "MTU too large for linear frames and XDP prog does not support frags"); return -EINVAL;
}
/* When turning XDP on->off/off->on we reset and rebuild the rings. */
need_reset = (i40e_enabled_xdp_vsi(vsi) != !!prog); if (need_reset)
i40e_prep_for_reset(pf);
old_prog = xchg(&vsi->xdp_prog, prog);
if (need_reset) { if (!prog) {
xdp_features_clear_redirect_target(vsi->netdev); /* Wait until ndo_xsk_wakeup completes. */
synchronize_rcu();
}
i40e_reset_and_rebuild(pf, true, true);
}
if (!i40e_enabled_xdp_vsi(vsi) && prog) { if (i40e_realloc_rx_bi_zc(vsi, true)) return -ENOMEM;
} elseif (i40e_enabled_xdp_vsi(vsi) && !prog) { if (i40e_realloc_rx_bi_zc(vsi, false)) return -ENOMEM;
}
for (i = 0; i < vsi->num_queue_pairs; i++)
WRITE_ONCE(vsi->rx_rings[i]->xdp_prog, vsi->xdp_prog);
if (old_prog)
bpf_prog_put(old_prog);
/* Kick start the NAPI context if there is an AF_XDP socket open * on that queue id. This so that receiving will start.
*/ if (need_reset && prog) { for (i = 0; i < vsi->num_queue_pairs; i++) if (vsi->xdp_rings[i]->xsk_pool)
(void)i40e_xsk_wakeup(vsi->netdev, i,
XDP_WAKEUP_RX);
xdp_features_set_redirect_target(vsi->netdev, true);
}
return 0;
}
/** * i40e_enter_busy_conf - Enters busy config state * @vsi: vsi * * Returns 0 on success, <0 for failure.
**/ staticint i40e_enter_busy_conf(struct i40e_vsi *vsi)
{ struct i40e_pf *pf = vsi->back; int timeout = 50;
while (test_and_set_bit(__I40E_CONFIG_BUSY, pf->state)) {
timeout--; if (!timeout) return -EBUSY;
usleep_range(1000, 2000);
}
/** * i40e_queue_pair_reset_stats - Resets all statistics for a queue pair * @vsi: vsi * @queue_pair: queue pair
**/ staticvoid i40e_queue_pair_reset_stats(struct i40e_vsi *vsi, int queue_pair)
{
memset(&vsi->rx_rings[queue_pair]->rx_stats, 0, sizeof(vsi->rx_rings[queue_pair]->rx_stats));
memset(&vsi->tx_rings[queue_pair]->stats, 0, sizeof(vsi->tx_rings[queue_pair]->stats)); if (i40e_enabled_xdp_vsi(vsi)) {
memset(&vsi->xdp_rings[queue_pair]->stats, 0, sizeof(vsi->xdp_rings[queue_pair]->stats));
}
}
/** * i40e_queue_pair_clean_rings - Cleans all the rings of a queue pair * @vsi: vsi * @queue_pair: queue pair
**/ staticvoid i40e_queue_pair_clean_rings(struct i40e_vsi *vsi, int queue_pair)
{
i40e_clean_tx_ring(vsi->tx_rings[queue_pair]); if (i40e_enabled_xdp_vsi(vsi)) { /* Make sure that in-progress ndo_xdp_xmit calls are * completed.
*/
synchronize_rcu();
i40e_clean_tx_ring(vsi->xdp_rings[queue_pair]);
}
i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
}
/** * i40e_queue_pair_toggle_napi - Enables/disables NAPI for a queue pair * @vsi: vsi * @queue_pair: queue pair * @enable: true for enable, false for disable
**/ staticvoid i40e_queue_pair_toggle_napi(struct i40e_vsi *vsi, int queue_pair, bool enable)
{ struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; struct i40e_q_vector *q_vector = rxr->q_vector;
if (!vsi->netdev) return;
/* All rings in a qp belong to the same qvector. */ if (q_vector->rx.ring || q_vector->tx.ring) { if (enable)
napi_enable(&q_vector->napi); else
napi_disable(&q_vector->napi);
}
}
/** * i40e_queue_pair_toggle_rings - Enables/disables all rings for a queue pair * @vsi: vsi * @queue_pair: queue pair * @enable: true for enable, false for disable * * Returns 0 on success, <0 on failure.
**/ staticint i40e_queue_pair_toggle_rings(struct i40e_vsi *vsi, int queue_pair, bool enable)
{ struct i40e_pf *pf = vsi->back; int pf_q, ret = 0;
/** * i40e_queue_pair_enable_irq - Enables interrupts for a queue pair * @vsi: vsi * @queue_pair: queue_pair
**/ staticvoid i40e_queue_pair_enable_irq(struct i40e_vsi *vsi, int queue_pair)
{ struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw;
/* All rings in a qp belong to the same qvector. */ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
i40e_irq_dynamic_enable(vsi, rxr->q_vector->v_idx); else
i40e_irq_dynamic_enable_icr0(pf);
i40e_flush(hw);
}
/** * i40e_queue_pair_disable_irq - Disables interrupts for a queue pair * @vsi: vsi * @queue_pair: queue_pair
**/ staticvoid i40e_queue_pair_disable_irq(struct i40e_vsi *vsi, int queue_pair)
{ struct i40e_ring *rxr = vsi->rx_rings[queue_pair]; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw;
/* For simplicity, instead of removing the qp interrupt causes * from the interrupt linked list, we simply disable the interrupt, and * leave the list intact. * * All rings in a qp belong to the same qvector.
*/ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
u32 intpf = vsi->base_vector + rxr->q_vector->v_idx;
/** * i40e_queue_pair_disable - Disables a queue pair * @vsi: vsi * @queue_pair: queue pair * * Returns 0 on success, <0 on failure.
**/ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair)
{ int err;
err = i40e_enter_busy_conf(vsi); if (err) return err;
i40e_queue_pair_disable_irq(vsi, queue_pair);
i40e_queue_pair_toggle_napi(vsi, queue_pair, false/* off */);
err = i40e_queue_pair_toggle_rings(vsi, queue_pair, false/* off */);
i40e_clean_rx_ring(vsi->rx_rings[queue_pair]);
i40e_queue_pair_clean_rings(vsi, queue_pair);
i40e_queue_pair_reset_stats(vsi, queue_pair);
return err;
}
/** * i40e_queue_pair_enable - Enables a queue pair * @vsi: vsi * @queue_pair: queue pair * * Returns 0 on success, <0 on failure.
**/ int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair)
{ int err;
err = i40e_configure_tx_ring(vsi->tx_rings[queue_pair]); if (err) return err;
if (i40e_enabled_xdp_vsi(vsi)) {
err = i40e_configure_tx_ring(vsi->xdp_rings[queue_pair]); if (err) return err;
}
err = i40e_configure_rx_ring(vsi->rx_rings[queue_pair]); if (err) return err;
err = i40e_queue_pair_toggle_rings(vsi, queue_pair, true/* on */);
i40e_queue_pair_toggle_napi(vsi, queue_pair, true/* on */);
i40e_queue_pair_enable_irq(vsi, queue_pair);
if (vsi->type == I40E_VSI_MAIN) {
SET_NETDEV_DEV(netdev, &pf->pdev->dev);
ether_addr_copy(mac_addr, hw->mac.perm_addr); /* The following steps are necessary for two reasons. First, * some older NVM configurations load a default MAC-VLAN * filter that will accept any tagged packet, and we want to * replace this with a normal filter. Additionally, it is * possible our MAC address was provided by the platform using * Open Firmware or similar. * * Thus, we need to remove the default filter and install one * specific to the MAC address.
*/
i40e_rm_default_mac_filter(vsi, mac_addr);
spin_lock_bh(&vsi->mac_filter_hash_lock);
i40e_add_mac_filter(vsi, mac_addr);
spin_unlock_bh(&vsi->mac_filter_hash_lock);
netdev->xdp_features = NETDEV_XDP_ACT_BASIC |
NETDEV_XDP_ACT_REDIRECT |
NETDEV_XDP_ACT_XSK_ZEROCOPY |
NETDEV_XDP_ACT_RX_SG;
netdev->xdp_zc_max_segs = I40E_MAX_BUFFER_TXD;
} else { /* Relate the VSI_VMDQ name to the VSI_MAIN name. Note that we * are still limited by IFNAMSIZ, but we're adding 'v%d\0' to * the end, which is 4 bytes long, so force truncation of the * original name by IFNAMSIZ - 4
*/ struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf);
/* Add the broadcast filter so that we initially will receive * broadcast packets. Note that when a new VLAN is first added the * driver will convert all filters marked I40E_VLAN_ANY into VLAN * specific filters as part of transitioning into "vlan" operation. * When more VLANs are added, the driver will copy each existing MAC * filter and add it for the new VLAN. * * Broadcast filters are handled specially by * i40e_sync_filters_subtask, as the driver must to set the broadcast * promiscuous bit instead of adding this directly as a MAC/VLAN * filter. The subtask will update the correct broadcast promiscuous * bits as VLANs become active or inactive.
*/
eth_broadcast_addr(broadcast);
spin_lock_bh(&vsi->mac_filter_hash_lock);
i40e_add_mac_filter(vsi, broadcast);
spin_unlock_bh(&vsi->mac_filter_hash_lock);
/** * i40e_vsi_delete - Delete a VSI from the switch * @vsi: the VSI being removed * * Returns 0 on success, negative value on failure
**/ staticvoid i40e_vsi_delete(struct i40e_vsi *vsi)
{ /* remove default VSI is not allowed */ if (vsi == vsi->back->vsi[vsi->back->lan_vsi]) return;
/** * i40e_is_vsi_uplink_mode_veb - Check if the VSI's uplink bridge mode is VEB * @vsi: the VSI being queried * * Returns 1 if HW bridge mode is VEB and return 0 in case of VEPA mode
**/ int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi)
{ struct i40e_veb *veb; struct i40e_pf *pf = vsi->back;
/* Uplink is not a bridge so default to VEB */ if (vsi->veb_idx >= I40E_MAX_VEB) return 1;
veb = pf->veb[vsi->veb_idx]; if (!veb) {
dev_info(&pf->pdev->dev, "There is no veb associated with the bridge\n"); return -ENOENT;
}
/* Uplink is a bridge in VEPA mode */ if (veb->bridge_mode & BRIDGE_MODE_VEPA) { return 0;
} else { /* Uplink is a bridge in VEB mode */ return 1;
}
/* VEPA is now default bridge, so return 0 */ return 0;
}
/** * i40e_add_vsi - Add a VSI to the switch * @vsi: the VSI being configured * * This initializes a VSI context depending on the VSI type to be added and * passes it down to the add_vsi aq command.
**/ staticint i40e_add_vsi(struct i40e_vsi *vsi)
{ int ret = -ENODEV; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; struct i40e_vsi_context ctxt; struct i40e_mac_filter *f; struct hlist_node *h; int bkt;
memset(&ctxt, 0, sizeof(ctxt)); switch (vsi->type) { case I40E_VSI_MAIN: /* The PF's main VSI is already setup as part of the * device initialization, so we'll not bother with * the add_vsi call, but we will retrieve the current * VSI context.
*/
ctxt.seid = pf->main_vsi_seid;
ctxt.pf_num = pf->hw.pf_id;
ctxt.vf_num = 0;
ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
ctxt.flags = I40E_AQ_VSI_TYPE_PF; if (ret) {
dev_info(&pf->pdev->dev, "couldn't get PF vsi config, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status)); return -ENOENT;
}
vsi->info = ctxt.info;
vsi->info.valid_sections = 0;
vsi->seid = ctxt.seid;
vsi->id = ctxt.vsi_number;
enabled_tc = i40e_pf_get_tc_map(pf);
/* Source pruning is enabled by default, so the flag is * negative logic - if it's set, we need to fiddle with * the VSI to disable source pruning.
*/ if (test_bit(I40E_FLAG_SOURCE_PRUNING_DIS, pf->flags)) {
memset(&ctxt, 0, sizeof(ctxt));
ctxt.seid = pf->main_vsi_seid;
ctxt.pf_num = pf->hw.pf_id;
ctxt.vf_num = 0;
ctxt.info.valid_sections |=
cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
ctxt.info.switch_id =
cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB);
ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) {
dev_info(&pf->pdev->dev, "update vsi failed, err %d aq_err %s\n",
ret,
libie_aq_str(pf->hw.aq.asq_last_status));
ret = -ENOENT; goto err;
}
}
/* MFP mode setup queue map and update VSI */ if (test_bit(I40E_FLAG_MFP_ENA, pf->flags) &&
!(pf->hw.func_caps.iscsi)) { /* NIC type PF */
memset(&ctxt, 0, sizeof(ctxt));
ctxt.seid = pf->main_vsi_seid;
ctxt.pf_num = pf->hw.pf_id;
ctxt.vf_num = 0;
i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL); if (ret) {
dev_info(&pf->pdev->dev, "update vsi failed, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status));
ret = -ENOENT; goto err;
} /* update the local VSI info queue map */
i40e_vsi_update_queue_map(vsi, &ctxt);
vsi->info.valid_sections = 0;
} else { /* Default/Main VSI is only enabled for TC0 * reconfigure it to enable all TCs that are * available on the port in SFP mode. * For MFP case the iSCSI PF would use this * flow to enable LAN+iSCSI TC.
*/
ret = i40e_vsi_config_tc(vsi, enabled_tc); if (ret) { /* Single TC condition is not fatal, * message and continue
*/
dev_info(&pf->pdev->dev, "failed to configure TCs for main VSI tc_map 0x%08x, err %pe aq_err %s\n",
enabled_tc,
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status));
}
} break;
/* This VSI is connected to VEB so the switch_id * should be set to zero by default.
*/ if (i40e_is_vsi_uplink_mode_veb(vsi)) {
ctxt.info.valid_sections |=
cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
ctxt.info.switch_id =
cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
}
/* Setup the VSI tx/rx queue map for TC0 only for now */
i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true); break;
/* This VSI is connected to VEB so the switch_id * should be set to zero by default.
*/ if (i40e_is_vsi_uplink_mode_veb(vsi)) {
ctxt.info.valid_sections |=
cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
ctxt.info.switch_id =
cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
}
ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL; if (pf->vf[vsi->vf_id].spoofchk) {
ctxt.info.valid_sections |=
cpu_to_le16(I40E_AQ_VSI_PROP_SECURITY_VALID);
ctxt.info.sec_flags |=
(I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK |
I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK);
} /* Setup the VSI tx/rx queue map for TC0 only for now */
i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true); break;
case I40E_VSI_IWARP: /* send down message to iWARP */ break;
default: return -ENODEV;
}
if (vsi->type != I40E_VSI_MAIN) {
ret = i40e_aq_add_vsi(hw, &ctxt, NULL); if (ret) {
dev_info(&vsi->back->pdev->dev, "add vsi failed, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status));
ret = -ENOENT; goto err;
}
vsi->info = ctxt.info;
vsi->info.valid_sections = 0;
vsi->seid = ctxt.seid;
vsi->id = ctxt.vsi_number;
}
spin_lock_bh(&vsi->mac_filter_hash_lock);
vsi->active_filters = 0; /* If macvlan filters already exist, force them to get loaded */
hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
f->state = I40E_FILTER_NEW;
f_count++;
}
spin_unlock_bh(&vsi->mac_filter_hash_lock);
clear_bit(__I40E_VSI_OVERFLOW_PROMISC, vsi->state);
if (f_count) {
vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
}
/* Update VSI BW information */
ret = i40e_vsi_get_bw_info(vsi); if (ret) {
dev_info(&pf->pdev->dev, "couldn't get vsi bw info, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); /* VSI is already added so not tearing that up */
ret = 0;
}
err: return ret;
}
/** * i40e_vsi_release - Delete a VSI and free its resources * @vsi: the VSI being removed * * Returns 0 on success or < 0 on error
**/ int i40e_vsi_release(struct i40e_vsi *vsi)
{ struct i40e_mac_filter *f; struct hlist_node *h; struct i40e_veb *veb; struct i40e_pf *pf;
u16 uplink_seid; int i, n, bkt;
pf = vsi->back;
/* release of a VEB-owner or last VSI is not allowed */ if (vsi->flags & I40E_VSI_FLAG_VEB_OWNER) {
dev_info(&pf->pdev->dev, "VSI %d has existing VEB %d\n",
vsi->seid, vsi->uplink_seid); return -ENODEV;
} if (vsi->type == I40E_VSI_MAIN && !test_bit(__I40E_DOWN, pf->state)) {
dev_info(&pf->pdev->dev, "Can't remove PF VSI\n"); return -ENODEV;
}
set_bit(__I40E_VSI_RELEASING, vsi->state);
uplink_seid = vsi->uplink_seid;
if (vsi->type != I40E_VSI_SRIOV) { if (vsi->netdev_registered) {
vsi->netdev_registered = false; if (vsi->netdev) { /* results in a call to i40e_close() */
unregister_netdev(vsi->netdev);
}
} else {
i40e_vsi_close(vsi);
}
i40e_vsi_disable_irq(vsi);
}
if (vsi->type == I40E_VSI_MAIN)
i40e_devlink_destroy_port(pf);
spin_lock_bh(&vsi->mac_filter_hash_lock);
/* clear the sync flag on all filters */ if (vsi->netdev) {
__dev_uc_unsync(vsi->netdev, NULL);
__dev_mc_unsync(vsi->netdev, NULL);
}
/* make sure any remaining filters are marked for deletion */
hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
__i40e_del_filter(vsi, f);
/* If this was the last thing on the VEB, except for the * controlling VSI, remove the VEB, which puts the controlling * VSI onto the uplink port. * * Well, okay, there's one more exception here: don't remove * the floating VEBs yet. We'll wait for an explicit remove request * from up the network stack.
*/
veb = i40e_pf_get_veb_by_seid(pf, uplink_seid); if (veb && veb->uplink_seid) {
n = 0;
/* Count non-controlling VSIs present on the VEB */
i40e_pf_for_each_vsi(pf, i, vsi) if (vsi->uplink_seid == uplink_seid &&
(vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0)
n++;
/* If there is no VSI except the control one then release * the VEB and put the control VSI onto VEB uplink.
*/ if (!n)
i40e_veb_release(veb);
}
return 0;
}
/** * i40e_vsi_setup_vectors - Set up the q_vectors for the given VSI * @vsi: ptr to the VSI * * This should only be called after i40e_vsi_mem_alloc() which allocates the * corresponding SW VSI structure and initializes num_queue_pairs for the * newly allocated VSI. * * Returns 0 on success or negative on failure
**/ staticint i40e_vsi_setup_vectors(struct i40e_vsi *vsi)
{ int ret = -ENOENT; struct i40e_pf *pf = vsi->back;
if (vsi->q_vectors[0]) {
dev_info(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
vsi->seid); return -EEXIST;
}
if (vsi->base_vector) {
dev_info(&pf->pdev->dev, "VSI %d has non-zero base vector %d\n",
vsi->seid, vsi->base_vector); return -EEXIST;
}
ret = i40e_vsi_alloc_q_vectors(vsi); if (ret) {
dev_info(&pf->pdev->dev, "failed to allocate %d q_vector for VSI %d, ret=%d\n",
vsi->num_q_vectors, vsi->seid, ret);
vsi->num_q_vectors = 0; goto vector_setup_out;
}
/* In Legacy mode, we do not have to get any other vector since we * piggyback on the misc/ICR0 for queue interrupts.
*/ if (!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) return ret; if (vsi->num_q_vectors)
vsi->base_vector = i40e_get_lump(pf, pf->irq_pile,
vsi->num_q_vectors, vsi->idx); if (vsi->base_vector < 0) {
dev_info(&pf->pdev->dev, "failed to get tracking for %d vectors for VSI %d, err=%d\n",
vsi->num_q_vectors, vsi->seid, vsi->base_vector);
i40e_vsi_free_q_vectors(vsi);
ret = -ENOENT; goto vector_setup_out;
}
vector_setup_out: return ret;
}
/** * i40e_vsi_reinit_setup - return and reallocate resources for a VSI * @vsi: pointer to the vsi. * * This re-allocates a vsi's queue resources. * * Returns pointer to the successfully allocated and configured VSI sw struct * on success, otherwise returns NULL on failure.
**/ staticstruct i40e_vsi *i40e_vsi_reinit_setup(struct i40e_vsi *vsi)
{ struct i40e_vsi *main_vsi;
u16 alloc_queue_pairs; struct i40e_pf *pf; int ret;
ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx); if (ret < 0) {
dev_info(&pf->pdev->dev, "failed to get tracking for %d queues for VSI %d err %d\n",
alloc_queue_pairs, vsi->seid, ret); goto err_vsi;
}
vsi->base_queue = ret;
/* Update the FW view of the VSI. Force a reset of TC and queue * layout configurations.
*/
main_vsi = i40e_pf_get_main_vsi(pf);
main_vsi->seid = pf->main_vsi_seid;
i40e_vsi_reconfig_tc(main_vsi);
if (vsi->type == I40E_VSI_MAIN)
i40e_rm_default_mac_filter(vsi, pf->hw.mac.perm_addr);
/* assign it some queues */
ret = i40e_alloc_rings(vsi); if (ret) goto err_rings;
/* map all of the rings to the q_vectors */
i40e_vsi_map_rings_to_vectors(vsi); return vsi;
/** * i40e_vsi_setup - Set up a VSI by a given type * @pf: board private structure * @type: VSI type * @uplink_seid: the switch element to link to * @param1: usage depends upon VSI type. For VF types, indicates VF id * * This allocates the sw VSI structure and its queue resources, then add a VSI * to the identified VEB. * * Returns pointer to the successfully allocated and configure VSI sw struct on * success, otherwise returns NULL on failure.
**/ struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
u16 uplink_seid, u32 param1)
{ struct i40e_vsi *vsi = NULL; struct i40e_veb *veb = NULL;
u16 alloc_queue_pairs; int v_idx; int ret;
/* The requested uplink_seid must be either * - the PF's port seid * no VEB is needed because this is the PF * or this is a Flow Director special case VSI * - seid of an existing VEB * - seid of a VSI that owns an existing VEB * - seid of a VSI that doesn't own a VEB * a new VEB is created and the VSI becomes the owner * - seid of the PF VSI, which is what creates the first VEB * this is a special case of the previous * * Find which uplink_seid we were given and create a new VEB if needed
*/
veb = i40e_pf_get_veb_by_seid(pf, uplink_seid); if (!veb && uplink_seid != pf->mac_seid) {
vsi = i40e_pf_get_vsi_by_seid(pf, uplink_seid); if (!vsi) {
dev_info(&pf->pdev->dev, "no such uplink_seid %d\n",
uplink_seid); return NULL;
}
if (vsi->uplink_seid == pf->mac_seid)
veb = i40e_veb_setup(pf, pf->mac_seid, vsi->seid,
vsi->tc_config.enabled_tc); elseif ((vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0)
veb = i40e_veb_setup(pf, vsi->uplink_seid, vsi->seid,
vsi->tc_config.enabled_tc); if (veb) { if (vsi->type != I40E_VSI_MAIN) {
dev_info(&vsi->back->pdev->dev, "New VSI creation error, uplink seid of LAN VSI expected.\n"); return NULL;
} /* We come up by default in VEPA mode if SRIOV is not * already enabled, in which case we can't force VEPA * mode.
*/ if (!test_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags)) {
veb->bridge_mode = BRIDGE_MODE_VEPA;
clear_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
}
i40e_config_bridge_mode(veb);
}
veb = i40e_pf_get_veb_by_seid(pf, vsi->uplink_seid); if (!veb) {
dev_info(&pf->pdev->dev, "couldn't add VEB\n"); return NULL;
}
/* get vsi sw struct */
v_idx = i40e_vsi_mem_alloc(pf, type); if (v_idx < 0) goto err_alloc;
vsi = pf->vsi[v_idx]; if (!vsi) goto err_alloc;
vsi->type = type;
vsi->veb_idx = (veb ? veb->idx : I40E_NO_VEB);
if (type == I40E_VSI_MAIN)
pf->lan_vsi = v_idx; elseif (type == I40E_VSI_SRIOV)
vsi->vf_id = param1; /* assign it some queues */
alloc_queue_pairs = vsi->alloc_queue_pairs *
(i40e_enabled_xdp_vsi(vsi) ? 2 : 1);
ret = i40e_get_lump(pf, pf->qp_pile, alloc_queue_pairs, vsi->idx); if (ret < 0) {
dev_info(&pf->pdev->dev, "failed to get tracking for %d queues for VSI %d err=%d\n",
alloc_queue_pairs, vsi->seid, ret); goto err_vsi;
}
vsi->base_queue = ret;
/* get a VSI from the hardware */
vsi->uplink_seid = uplink_seid;
ret = i40e_add_vsi(vsi); if (ret) goto err_vsi;
switch (vsi->type) { /* setup the netdev if needed */ case I40E_VSI_MAIN: case I40E_VSI_VMDQ2:
ret = i40e_config_netdev(vsi); if (ret) goto err_netdev;
ret = i40e_netif_set_realnum_tx_rx_queues(vsi); if (ret) goto err_netdev; if (vsi->type == I40E_VSI_MAIN) {
ret = i40e_devlink_create_port(pf); if (ret) goto err_netdev;
SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port);
}
ret = register_netdev(vsi->netdev); if (ret) goto err_dl_port;
vsi->netdev_registered = true;
netif_carrier_off(vsi->netdev); #ifdef CONFIG_I40E_DCB /* Setup DCB netlink interface */
i40e_dcbnl_setup(vsi); #endif/* CONFIG_I40E_DCB */
fallthrough; case I40E_VSI_FDIR: /* set up vectors and rings if needed */
ret = i40e_vsi_setup_vectors(vsi); if (ret) goto err_msix;
ret = i40e_alloc_rings(vsi); if (ret) goto err_rings;
/* map all of the rings to the q_vectors */
i40e_vsi_map_rings_to_vectors(vsi);
i40e_vsi_reset_stats(vsi); break; default: /* no netdev or rings for the other VSI types */ break;
}
if (test_bit(I40E_HW_CAP_RSS_AQ, pf->hw.caps) &&
vsi->type == I40E_VSI_VMDQ2) {
ret = i40e_vsi_config_rss(vsi); if (ret) goto err_config;
} return vsi;
/** * i40e_veb_get_bw_info - Query VEB BW information * @veb: the veb to query * * Query the Tx scheduler BW configuration data for given VEB
**/ staticint i40e_veb_get_bw_info(struct i40e_veb *veb)
{ struct i40e_aqc_query_switching_comp_ets_config_resp ets_data; struct i40e_aqc_query_switching_comp_bw_config_resp bw_data; struct i40e_pf *pf = veb->pf; struct i40e_hw *hw = &pf->hw;
u32 tc_bw_max; int ret = 0; int i;
ret = i40e_aq_query_switch_comp_bw_config(hw, veb->seid,
&bw_data, NULL); if (ret) {
dev_info(&pf->pdev->dev, "query veb bw config failed, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(hw->aq.asq_last_status)); goto out;
}
ret = i40e_aq_query_switch_comp_ets_config(hw, veb->seid,
&ets_data, NULL); if (ret) {
dev_info(&pf->pdev->dev, "query veb bw ets config failed, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(hw->aq.asq_last_status)); goto out;
}
/** * i40e_veb_mem_alloc - Allocates the next available struct veb in the PF * @pf: board private structure * * On error: returns error code (negative) * On success: returns vsi index in PF (positive)
**/ staticint i40e_veb_mem_alloc(struct i40e_pf *pf)
{ int ret = -ENOENT; struct i40e_veb *veb; int i;
/* Need to protect the allocation of switch elements at the PF level */
mutex_lock(&pf->switch_mutex);
/* VEB list may be fragmented if VEB creation/destruction has * been happening. We can afford to do a quick scan to look * for any free slots in the list. * * find next empty veb slot, looping back around if necessary
*/
i = 0; while ((i < I40E_MAX_VEB) && (pf->veb[i] != NULL))
i++; if (i >= I40E_MAX_VEB) {
ret = -ENOMEM; goto err_alloc_veb; /* out of VEB slots! */
}
veb = kzalloc(sizeof(*veb), GFP_KERNEL); if (!veb) {
ret = -ENOMEM; goto err_alloc_veb;
}
veb->pf = pf;
veb->idx = i;
veb->enabled_tc = 1;
/** * i40e_switch_branch_release - Delete a branch of the switch tree * @branch: where to start deleting * * This uses recursion to find the tips of the branch to be * removed, deleting until we get back to and can delete this VEB.
**/ staticvoid i40e_switch_branch_release(struct i40e_veb *branch)
{ struct i40e_pf *pf = branch->pf;
u16 branch_seid = branch->seid;
u16 veb_idx = branch->idx; struct i40e_vsi *vsi; struct i40e_veb *veb; int i;
/* release any VEBs on this VEB - RECURSION */
i40e_pf_for_each_veb(pf, i, veb) if (veb->uplink_seid == branch->seid)
i40e_switch_branch_release(veb);
/* Release the VSIs on this VEB, but not the owner VSI. * * NOTE: Removing the last VSI on a VEB has the SIDE EFFECT of removing * the VEB itself, so don't use (*branch) after this loop.
*/
i40e_pf_for_each_vsi(pf, i, vsi) if (vsi->uplink_seid == branch_seid &&
(vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0)
i40e_vsi_release(vsi);
/* There's one corner case where the VEB might not have been * removed, so double check it here and remove it if needed. * This case happens if the veb was created from the debugfs * commands and no VSIs were added to it.
*/ if (pf->veb[veb_idx])
i40e_veb_release(pf->veb[veb_idx]);
}
/** * i40e_veb_clear - remove veb struct * @veb: the veb to remove
**/ staticvoid i40e_veb_clear(struct i40e_veb *veb)
{ if (!veb) return;
if (veb->pf) { struct i40e_pf *pf = veb->pf;
mutex_lock(&pf->switch_mutex); if (pf->veb[veb->idx] == veb)
pf->veb[veb->idx] = NULL;
mutex_unlock(&pf->switch_mutex);
}
kfree(veb);
}
/** * i40e_veb_release - Delete a VEB and free its resources * @veb: the VEB being removed
**/ void i40e_veb_release(struct i40e_veb *veb)
{ struct i40e_vsi *vsi, *vsi_it; struct i40e_pf *pf; int i, n = 0;
pf = veb->pf;
/* find the remaining VSI and check for extras */
i40e_pf_for_each_vsi(pf, i, vsi_it) if (vsi_it->uplink_seid == veb->seid) { if (vsi_it->flags & I40E_VSI_FLAG_VEB_OWNER)
vsi = vsi_it;
n++;
}
/* Floating VEB has to be empty and regular one must have * single owner VSI.
*/ if ((veb->uplink_seid && n != 1) || (!veb->uplink_seid && n != 0)) {
dev_info(&pf->pdev->dev, "can't remove VEB %d with %d VSIs left\n",
veb->seid, n); return;
}
/* For regular VEB move the owner VSI to uplink port */ if (veb->uplink_seid) {
vsi->flags &= ~I40E_VSI_FLAG_VEB_OWNER;
vsi->uplink_seid = veb->uplink_seid;
vsi->veb_idx = I40E_NO_VEB;
}
/** * i40e_add_veb - create the VEB in the switch * @veb: the VEB to be instantiated * @vsi: the controlling VSI
**/ staticint i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
{ struct i40e_pf *pf = veb->pf; bool enable_stats = !!test_bit(I40E_FLAG_VEB_STATS_ENA, pf->flags); int ret;
ret = i40e_aq_add_veb(&pf->hw, veb->uplink_seid, vsi ? vsi->seid : 0,
veb->enabled_tc, vsi ? false : true,
&veb->seid, enable_stats, NULL);
/* get a VEB from the hardware */ if (ret) {
dev_info(&pf->pdev->dev, "couldn't add VEB, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); return -EPERM;
}
/* get statistics counter */
ret = i40e_aq_get_veb_parameters(&pf->hw, veb->seid, NULL, NULL,
&veb->stats_idx, NULL, NULL, NULL); if (ret) {
dev_info(&pf->pdev->dev, "couldn't get VEB statistics idx, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); return -EPERM;
}
ret = i40e_veb_get_bw_info(veb); if (ret) {
dev_info(&pf->pdev->dev, "couldn't get VEB bw info, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status));
i40e_aq_delete_element(&pf->hw, veb->seid, NULL); return -ENOENT;
}
/** * i40e_veb_setup - Set up a VEB * @pf: board private structure * @uplink_seid: the switch element to link to * @vsi_seid: the initial VSI seid * @enabled_tc: Enabled TC bit-map * * This allocates the sw VEB structure and links it into the switch * It is possible and legal for this to be a duplicate of an already * existing VEB. It is also possible for both uplink and vsi seids * to be zero, in order to create a floating VEB. * * Returns pointer to the successfully allocated VEB sw struct on * success, otherwise returns NULL on failure.
**/ struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 uplink_seid,
u16 vsi_seid, u8 enabled_tc)
{ struct i40e_vsi *vsi = NULL; struct i40e_veb *veb; int veb_idx; int ret;
/* if one seid is 0, the other must be 0 to create a floating relay */ if ((uplink_seid == 0 || vsi_seid == 0) &&
(uplink_seid + vsi_seid != 0)) {
dev_info(&pf->pdev->dev, "one, not both seid's are 0: uplink=%d vsi=%d\n",
uplink_seid, vsi_seid); return NULL;
}
/* make sure there is such a vsi and uplink */ if (vsi_seid) {
vsi = i40e_pf_get_vsi_by_seid(pf, vsi_seid); if (!vsi) {
dev_err(&pf->pdev->dev, "vsi seid %d not found\n",
vsi_seid); return NULL;
}
}
/* get veb sw struct */
veb_idx = i40e_veb_mem_alloc(pf); if (veb_idx < 0) goto err_alloc;
veb = pf->veb[veb_idx];
veb->uplink_seid = uplink_seid;
veb->enabled_tc = (enabled_tc ? enabled_tc : 0x1);
/* create the VEB in the switch */
ret = i40e_add_veb(veb, vsi); if (ret) goto err_veb;
if (vsi && vsi->idx == pf->lan_vsi)
pf->lan_veb = veb->idx;
/** * i40e_setup_pf_switch_element - set PF vars based on switch type * @pf: board private structure * @ele: element we are building info from * @num_reported: total number of elements * @printconfig: should we print the contents * * helper function to assist in extracting a few useful SEID values.
**/ staticvoid i40e_setup_pf_switch_element(struct i40e_pf *pf, struct i40e_aqc_switch_config_element_resp *ele,
u16 num_reported, bool printconfig)
{
u16 downlink_seid = le16_to_cpu(ele->downlink_seid);
u16 uplink_seid = le16_to_cpu(ele->uplink_seid);
u8 element_type = ele->element_type;
u16 seid = le16_to_cpu(ele->seid); struct i40e_veb *veb;
switch (element_type) { case I40E_SWITCH_ELEMENT_TYPE_MAC:
pf->mac_seid = seid; break; case I40E_SWITCH_ELEMENT_TYPE_VEB: /* Main VEB? */ if (uplink_seid != pf->mac_seid) break;
veb = i40e_pf_get_main_veb(pf); if (!veb) { int v;
/* find existing or else empty VEB */
veb = i40e_pf_get_veb_by_seid(pf, seid); if (veb) {
pf->lan_veb = veb->idx;
} else {
v = i40e_veb_mem_alloc(pf); if (v < 0) break;
pf->lan_veb = v;
}
}
/* Try to get again main VEB as pf->lan_veb may have changed */
veb = i40e_pf_get_main_veb(pf); if (!veb) break;
veb->seid = seid;
veb->uplink_seid = pf->mac_seid;
veb->pf = pf; break; case I40E_SWITCH_ELEMENT_TYPE_VSI: if (num_reported != 1) break; /* This is immediately after a reset so we can assume this is * the PF's VSI
*/
pf->mac_seid = uplink_seid;
pf->main_vsi_seid = seid; if (printconfig)
dev_info(&pf->pdev->dev, "pf_seid=%d main_vsi_seid=%d\n",
downlink_seid, pf->main_vsi_seid); break; case I40E_SWITCH_ELEMENT_TYPE_PF: case I40E_SWITCH_ELEMENT_TYPE_VF: case I40E_SWITCH_ELEMENT_TYPE_EMP: case I40E_SWITCH_ELEMENT_TYPE_BMC: case I40E_SWITCH_ELEMENT_TYPE_PE: case I40E_SWITCH_ELEMENT_TYPE_PA: /* ignore these for now */ break; default:
dev_info(&pf->pdev->dev, "unknown element type=%d seid=%d\n",
element_type, seid); break;
}
}
/** * i40e_fetch_switch_configuration - Get switch config from firmware * @pf: board private structure * @printconfig: should we print the contents * * Get the current switch configuration from the device and * extract a few useful SEID values.
**/ int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig)
{ struct i40e_aqc_get_switch_config_resp *sw_config;
u16 next_seid = 0; int ret = 0;
u8 *aq_buf; int i;
aq_buf = kzalloc(I40E_AQ_LARGE_BUF, GFP_KERNEL); if (!aq_buf) return -ENOMEM;
sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf; do {
u16 num_reported, num_total;
if (printconfig)
dev_info(&pf->pdev->dev, "header: %d reported %d total\n",
num_reported, num_total);
for (i = 0; i < num_reported; i++) { struct i40e_aqc_switch_config_element_resp *ele =
&sw_config->element[i];
i40e_setup_pf_switch_element(pf, ele, num_reported,
printconfig);
}
} while (next_seid != 0);
kfree(aq_buf); return ret;
}
/** * i40e_setup_pf_switch - Setup the HW switch on startup or after reset * @pf: board private structure * @reinit: if the Main VSI needs to re-initialized. * @lock_acquired: indicates whether or not the lock has been acquired * * Returns 0 on success, negative value on failure
**/ staticint i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit, bool lock_acquired)
{ struct i40e_vsi *main_vsi;
u16 flags = 0; int ret;
/* find out what's out there already */
ret = i40e_fetch_switch_configuration(pf, false); if (ret) {
dev_info(&pf->pdev->dev, "couldn't fetch switch config, err %pe aq_err %s\n",
ERR_PTR(ret), libie_aq_str(pf->hw.aq.asq_last_status)); return ret;
}
i40e_pf_reset_stats(pf);
/* set the switch config bit for the whole device to * support limited promisc or true promisc * when user requests promisc. The default is limited * promisc.
*/
valid_flags = I40E_AQ_SET_SWITCH_CFG_PROMISC;
ret = i40e_aq_set_switch_config(&pf->hw, flags, valid_flags, 0,
NULL); if (ret && pf->hw.aq.asq_last_status != LIBIE_AQ_RC_ESRCH) {
dev_info(&pf->pdev->dev, "couldn't set switch config bits, err %pe aq_err %s\n",
ERR_PTR(ret),
libie_aq_str(pf->hw.aq.asq_last_status)); /* not a fatal problem, just keep going */
}
pf->last_sw_conf_valid_flags = valid_flags;
}
/* first time setup */
main_vsi = i40e_pf_get_main_vsi(pf); if (!main_vsi || reinit) { struct i40e_veb *veb;
u16 uplink_seid;
/* Set up the PF VSI associated with the PF's main VSI * that is already in the HW switch
*/
veb = i40e_pf_get_main_veb(pf); if (veb)
uplink_seid = veb->seid; else
uplink_seid = pf->mac_seid; if (!main_vsi)
main_vsi = i40e_vsi_setup(pf, I40E_VSI_MAIN,
uplink_seid, 0); elseif (reinit)
main_vsi = i40e_vsi_reinit_setup(main_vsi); if (!main_vsi) {
dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n");
i40e_cloud_filter_exit(pf);
i40e_fdir_teardown(pf); return -EAGAIN;
}
} else { /* force a reset of TC and queue layout configurations */
main_vsi->seid = pf->main_vsi_seid;
i40e_vsi_reconfig_tc(main_vsi);
}
i40e_vlan_stripping_disable(main_vsi);
i40e_fdir_sb_setup(pf);
/* Setup static PF queue filter control settings */
ret = i40e_setup_pf_filter_control(pf); if (ret) {
dev_info(&pf->pdev->dev, "setup_pf_filter_control failed: %d\n",
ret); /* Failure here should not stop continuing other steps */
}
/* enable RSS in the HW, even for only one queue, as the stack can use * the hash
*/ if (test_bit(I40E_FLAG_RSS_ENA, pf->flags))
i40e_pf_config_rss(pf);
/* fill in link information and enable LSE reporting */
i40e_link_event(pf);
i40e_ptp_init(pf);
if (!lock_acquired)
rtnl_lock();
/* repopulate tunnel port filters */
udp_tunnel_nic_reset_ntf(main_vsi->netdev);
if (!lock_acquired)
rtnl_unlock();
return ret;
}
/** * i40e_determine_queue_usage - Work out queue distribution * @pf: board private structure
**/ staticvoid i40e_determine_queue_usage(struct i40e_pf *pf)
{ int queues_left; int q_max;
pf->num_lan_qps = 0;
/* Find the max queues to be put into basic use. We'll always be * using TC0, whether or not DCB is running, and TC0 will get the * big RSS set.
*/
queues_left = pf->hw.func_caps.num_tx_qp;
if ((queues_left == 1) ||
!test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) { /* one qp for PF, no queues for anything else */
queues_left = 0;
pf->alloc_rss_size = pf->num_lan_qps = 1;
/* make sure all the fancies are disabled */
clear_bit(I40E_FLAG_RSS_ENA, pf->flags);
clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags);
clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags);
clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags);
set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
} elseif (!test_bit(I40E_FLAG_RSS_ENA, pf->flags) &&
!test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) &&
!test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags) &&
!test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags)) { /* one qp for PF */
pf->alloc_rss_size = pf->num_lan_qps = 1;
queues_left -= pf->num_lan_qps;
clear_bit(I40E_FLAG_RSS_ENA, pf->flags);
clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
clear_bit(I40E_FLAG_FD_ATR_ENA, pf->flags);
clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
clear_bit(I40E_FLAG_VMDQ_ENA, pf->flags);
set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
} else { /* Not enough queues for all TCs */ if (test_bit(I40E_FLAG_DCB_CAPABLE, pf->flags) &&
queues_left < I40E_MAX_TRAFFIC_CLASS) {
clear_bit(I40E_FLAG_DCB_CAPABLE, pf->flags);
clear_bit(I40E_FLAG_DCB_ENA, pf->flags);
dev_info(&pf->pdev->dev, "not enough queues for DCB. DCB is disabled.\n");
}
/* limit lan qps to the smaller of qps, cpus or msix */
q_max = max_t(int, pf->rss_size_max, num_online_cpus());
q_max = min_t(int, q_max, pf->hw.func_caps.num_tx_qp);
q_max = min_t(int, q_max, pf->hw.func_caps.num_msix_vectors);
pf->num_lan_qps = q_max;
queues_left -= pf->num_lan_qps;
}
if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags)) { if (queues_left > 1) {
queues_left -= 1; /* save 1 queue for FD */
} else {
clear_bit(I40E_FLAG_FD_SB_ENA, pf->flags);
set_bit(I40E_FLAG_FD_SB_INACTIVE, pf->flags);
dev_info(&pf->pdev->dev, "not enough queues for Flow Director. Flow Director feature is disabled\n");
}
}
/** * i40e_setup_pf_filter_control - Setup PF static filter control * @pf: PF to be setup * * i40e_setup_pf_filter_control sets up a PF's initial filter control * settings. If PE/FCoE are enabled then it will also set the per PF * based filter sizes required for them. It also enables Flow director, * ethertype and macvlan type filter settings for the pf. * * Returns 0 on success, negative on failure
**/ staticint i40e_setup_pf_filter_control(struct i40e_pf *pf)
{ struct i40e_filter_control_settings *settings = &pf->filter_settings;
settings->hash_lut_size = I40E_HASH_LUT_SIZE_128;
/* Flow Director is enabled */ if (test_bit(I40E_FLAG_FD_SB_ENA, pf->flags) ||
test_bit(I40E_FLAG_FD_ATR_ENA, pf->flags))
settings->enable_fdir = true;
/* Ethtype and MACVLAN filters enabled for PF */
settings->enable_ethtype = true;
settings->enable_macvlan = true;
if (i40e_set_filter_control(&pf->hw, settings)) return -ENOENT;
/** * i40e_get_platform_mac_addr - get platform-specific MAC address * @pdev: PCI device information struct * @pf: board private structure * * Look up the MAC address for the device. First we'll try * eth_platform_get_mac_address, which will check Open Firmware, or arch * specific fallback. Otherwise, we'll default to the stored value in * firmware.
**/ staticvoid i40e_get_platform_mac_addr(struct pci_dev *pdev, struct i40e_pf *pf)
{ if (eth_platform_get_mac_address(&pdev->dev, pf->hw.mac.addr))
i40e_get_mac_addr(&pf->hw, pf->hw.mac.addr);
}
/** * i40e_set_fec_in_flags - helper function for setting FEC options in flags * @fec_cfg: FEC option to set in flags * @flags: ptr to flags in which we set FEC option
**/ void i40e_set_fec_in_flags(u8 fec_cfg, unsignedlong *flags)
{ if (fec_cfg & I40E_AQ_SET_FEC_AUTO) {
set_bit(I40E_FLAG_RS_FEC, flags);
set_bit(I40E_FLAG_BASE_R_FEC, flags);
} if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_RS) ||
(fec_cfg & I40E_AQ_SET_FEC_ABILITY_RS)) {
set_bit(I40E_FLAG_RS_FEC, flags);
clear_bit(I40E_FLAG_BASE_R_FEC, flags);
} if ((fec_cfg & I40E_AQ_SET_FEC_REQUEST_KR) ||
(fec_cfg & I40E_AQ_SET_FEC_ABILITY_KR)) {
set_bit(I40E_FLAG_BASE_R_FEC, flags);
clear_bit(I40E_FLAG_RS_FEC, flags);
} if (fec_cfg == 0) {
clear_bit(I40E_FLAG_RS_FEC, flags);
clear_bit(I40E_FLAG_BASE_R_FEC, flags);
}
}
/** * i40e_check_recovery_mode - check if we are running transition firmware * @pf: board private structure * * Check registers indicating the firmware runs in recovery mode. Sets the * appropriate driver state. * * Returns true if the recovery mode was detected, false otherwise
**/ staticbool i40e_check_recovery_mode(struct i40e_pf *pf)
{
u32 val = rd32(&pf->hw, I40E_GL_FWSTS);
if (val & I40E_GL_FWSTS_FWS1B_MASK) {
dev_crit(&pf->pdev->dev, "Firmware recovery mode detected. Limiting functionality.\n");
dev_crit(&pf->pdev->dev, "Refer to the Intel(R) Ethernet Adapters and Devices User Guide for details on firmware recovery mode.\n");
set_bit(__I40E_RECOVERY_MODE, pf->state);
returntrue;
} if (test_bit(__I40E_RECOVERY_MODE, pf->state))
dev_info(&pf->pdev->dev, "Please do Power-On Reset to initialize adapter in normal mode with full functionality.\n");
returnfalse;
}
/** * i40e_pf_loop_reset - perform reset in a loop. * @pf: board private structure * * This function is useful when a NIC is about to enter recovery mode. * When a NIC's internal data structures are corrupted the NIC's * firmware is going to enter recovery mode. * Right after a POR it takes about 7 minutes for firmware to enter * recovery mode. Until that time a NIC is in some kind of intermediate * state. After that time period the NIC almost surely enters * recovery mode. The only way for a driver to detect intermediate * state is to issue a series of pf-resets and check a return value. * If a PF reset returns success then the firmware could be in recovery * mode so the caller of this code needs to check for recovery mode * if this function returns success. There is a little chance that * firmware will hang in intermediate state forever. * Since waiting 7 minutes is quite a lot of time this function waits * 10 seconds and then gives up by returning an error. * * Return 0 on success, negative on failure.
**/ staticint i40e_pf_loop_reset(struct i40e_pf *pf)
{ /* wait max 10 seconds for PF reset to succeed */ constunsignedlong time_end = jiffies + 10 * HZ; struct i40e_hw *hw = &pf->hw; int ret;
ret = i40e_pf_reset(hw); while (ret != 0 && time_before(jiffies, time_end)) {
usleep_range(10000, 20000);
ret = i40e_pf_reset(hw);
}
/** * i40e_check_fw_empr - check if FW issued unexpected EMP Reset * @pf: board private structure * * Check FW registers to determine if FW issued unexpected EMP Reset. * Every time when unexpected EMP Reset occurs the FW increments * a counter of unexpected EMP Resets. When the counter reaches 10 * the FW should enter the Recovery mode * * Returns true if FW issued unexpected EMP Reset
**/ staticbool i40e_check_fw_empr(struct i40e_pf *pf)
{ const u32 fw_sts = rd32(&pf->hw, I40E_GL_FWSTS) &
I40E_GL_FWSTS_FWS1B_MASK; return (fw_sts > I40E_GL_FWSTS_FWS1B_EMPR_0) &&
(fw_sts <= I40E_GL_FWSTS_FWS1B_EMPR_10);
}
/** * i40e_handle_resets - handle EMP resets and PF resets * @pf: board private structure * * Handle both EMP resets and PF resets and conclude whether there are * any issues regarding these resets. If there are any issues then * generate log entry. * * Return 0 if NIC is healthy or negative value when there are issues * with resets
**/ staticint i40e_handle_resets(struct i40e_pf *pf)
{ constint pfr = i40e_pf_loop_reset(pf); constbool is_empr = i40e_check_fw_empr(pf);
if (is_empr || pfr != 0)
dev_crit(&pf->pdev->dev, "Entering recovery mode due to repeated FW resets. This may take several minutes. Refer to the Intel(R) Ethernet Adapters and Devices User Guide.\n");
return is_empr ? -EIO : pfr;
}
/** * i40e_init_recovery_mode - initialize subsystems needed in recovery mode * @pf: board private structure * @hw: ptr to the hardware info * * This function does a minimal setup of all subsystems needed for running * recovery mode. * * Returns 0 on success, negative on failure
**/ staticint i40e_init_recovery_mode(struct i40e_pf *pf, struct i40e_hw *hw)
{ struct i40e_vsi *vsi; int err; int v_idx;
err = i40e_init_interrupt_scheme(pf); if (err) goto err_switch_setup;
/* The number of VSIs reported by the FW is the minimum guaranteed * to us; HW supports far more and we share the remaining pool with * the other PFs. We allocate space for more than the guarantee with * the understanding that we might not get them all later.
*/ if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC)
pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC; else
pf->num_alloc_vsi = pf->hw.func_caps.num_vsis;
/* Set up the vsi struct and our local tracking of the MAIN PF vsi. */
pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
GFP_KERNEL); if (!pf->vsi) {
err = -ENOMEM; goto err_switch_setup;
}
/* We allocate one VSI which is needed as absolute minimum * in order to register the netdev
*/
v_idx = i40e_vsi_mem_alloc(pf, I40E_VSI_MAIN); if (v_idx < 0) {
err = v_idx; goto err_switch_setup;
}
pf->lan_vsi = v_idx;
vsi = pf->vsi[v_idx]; if (!vsi) {
err = -EFAULT; goto err_switch_setup;
}
vsi->alloc_queue_pairs = 1;
err = i40e_config_netdev(vsi); if (err) goto err_switch_setup;
err = register_netdev(vsi->netdev); if (err) goto err_switch_setup;
vsi->netdev_registered = true;
i40e_dbg_pf_init(pf);
err = i40e_setup_misc_vector_for_recovery_mode(pf); if (err) goto err_switch_setup;
/* tell the firmware that we're starting */
i40e_send_version(pf);
/* since everything's happy, start the service_task timer */
mod_timer(&pf->service_timer,
round_jiffies(jiffies + pf->service_timer_period));
/** * i40e_set_subsystem_device_id - set subsystem device id * @hw: pointer to the hardware info * * Set PCI subsystem device id either from a pci_dev structure or * a specific FW register.
**/ staticinlinevoid i40e_set_subsystem_device_id(struct i40e_hw *hw)
{ struct i40e_pf *pf = i40e_hw_to_pf(hw);
/** * i40e_probe - Device initialization routine * @pdev: PCI device information struct * @ent: entry in i40e_pci_tbl * * i40e_probe initializes a PF identified by a pci_dev structure. * The OS initialization, configuring of the PF private structure, * and a hardware reset occur. * * Returns 0 on success, negative on failure
**/ staticint i40e_probe(struct pci_dev *pdev, conststruct pci_device_id *ent)
{ struct i40e_aq_get_phy_abilities_resp abilities; #ifdef CONFIG_I40E_DCB enum i40e_get_fw_lldp_status_resp lldp_status; #endif/* CONFIG_I40E_DCB */ struct i40e_vsi *vsi; struct i40e_pf *pf; struct i40e_hw *hw;
u16 wol_nvm_bits; char nvm_ver[32];
u16 link_status; #ifdef CONFIG_I40E_DCB int status; #endif/* CONFIG_I40E_DCB */ int err;
u32 val;
err = pci_enable_device_mem(pdev); if (err) return err;
/* set up for high or low dma */
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (err) {
dev_err(&pdev->dev, "DMA configuration failed: 0x%x\n", err); goto err_dma;
}
/* set up pci connections */
err = pci_request_mem_regions(pdev, i40e_driver_name); if (err) {
dev_info(&pdev->dev, "pci_request_selected_regions failed %d\n", err); goto err_pci_reg;
}
pci_set_master(pdev);
/* Now that we have a PCI connection, we need to do the * low level device setup. This is primarily setting up * the Admin Queue structures and then querying for the * device's current profile information.
*/
pf = i40e_alloc_pf(&pdev->dev); if (!pf) {
err = -ENOMEM; goto err_pf_alloc;
}
pf->next_vsi = 0;
pf->pdev = pdev;
set_bit(__I40E_DOWN, pf->state);
hw = &pf->hw;
pf->ioremap_len = min_t(int, pci_resource_len(pdev, 0),
I40E_MAX_CSR_SPACE); /* We believe that the highest register to read is * I40E_GLGEN_STAT_CLEAR, so we check if the BAR size * is not less than that before mapping to prevent a * kernel panic.
*/ if (pf->ioremap_len < I40E_GLGEN_STAT_CLEAR) {
dev_err(&pdev->dev, "Cannot map registers, bar size 0x%X too small, aborting\n",
pf->ioremap_len);
err = -ENOMEM; goto err_ioremap;
}
hw->hw_addr = ioremap(pci_resource_start(pdev, 0), pf->ioremap_len); if (!hw->hw_addr) {
err = -EIO;
dev_info(&pdev->dev, "ioremap(0x%04x, 0x%04x) failed: 0x%x\n",
(unsignedint)pci_resource_start(pdev, 0),
pf->ioremap_len, err); goto err_ioremap;
}
hw->vendor_id = pdev->vendor;
hw->device_id = pdev->device;
pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
hw->subsystem_vendor_id = pdev->subsystem_vendor;
i40e_set_subsystem_device_id(hw);
hw->bus.device = PCI_SLOT(pdev->devfn);
hw->bus.func = PCI_FUNC(pdev->devfn);
hw->bus.bus_id = pdev->bus->number;
/* Select something other than the 802.1ad ethertype for the * switch to use internally and drop on ingress.
*/
hw->switch_tag = 0xffff;
hw->first_tag = ETH_P_8021AD;
hw->second_tag = ETH_P_8021Q;
/* set up the locks for the AQ, do this only once in probe * and destroy them only once in remove
*/
mutex_init(&hw->aq.asq_mutex);
mutex_init(&hw->aq.arq_mutex);
/* do a special CORER for clearing PXE mode once at init */ if (hw->revision_id == 0 &&
(rd32(hw, I40E_GLLAN_RCTL_0) & I40E_GLLAN_RCTL_0_PXE_MODE_MASK)) {
wr32(hw, I40E_GLGEN_RTRIG, I40E_GLGEN_RTRIG_CORER_MASK);
i40e_flush(hw);
msleep(200);
pf->corer_count++;
i40e_clear_pxe_mode(hw);
}
/* Reset here to make sure all is clean and to define PF 'n' */
i40e_clear_hw(hw);
err = i40e_set_mac_type(hw); if (err) {
dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n",
err); goto err_pf_reset;
}
err = i40e_handle_resets(pf); if (err) goto err_pf_reset;
err = i40e_init_shared_code(hw); if (err) {
dev_warn(&pdev->dev, "unidentified MAC or BLANK NVM: %d\n",
err); goto err_pf_reset;
}
/* set up a default setting for link flow control */
pf->hw.fc.requested_mode = I40E_FC_NONE;
err = i40e_init_adminq(hw); if (err) { if (err == -EIO)
dev_info(&pdev->dev, "The driver for the device stopped because the NVM image v%u.%u is newer than expected v%u.%u. You must install the most recent version of the network driver.\n",
hw->aq.api_maj_ver,
hw->aq.api_min_ver,
I40E_FW_API_VERSION_MAJOR,
I40E_FW_MINOR_VERSION(hw)); else
dev_info(&pdev->dev, "The driver for the device stopped because the device firmware failed to init. Try updating your NVM image.\n");
/* provide nvm, fw, api versions, vendor:device id, subsys vendor:device id */
i40e_nvm_version_str(hw, nvm_ver, sizeof(nvm_ver));
dev_info(&pdev->dev, "fw %d.%d.%05d api %d.%d nvm %s [%04x:%04x] [%04x:%04x]\n",
hw->aq.fw_maj_ver, hw->aq.fw_min_ver, hw->aq.fw_build,
hw->aq.api_maj_ver, hw->aq.api_min_ver, nvm_ver,
hw->vendor_id, hw->device_id, hw->subsystem_vendor_id,
hw->subsystem_device_id);
if (i40e_is_aq_api_ver_ge(hw, I40E_FW_API_VERSION_MAJOR,
I40E_FW_MINOR_VERSION(hw) + 1))
dev_dbg(&pdev->dev, "The driver for the device detected a newer version of the NVM image v%u.%u than v%u.%u.\n",
hw->aq.api_maj_ver,
hw->aq.api_min_ver,
I40E_FW_API_VERSION_MAJOR,
I40E_FW_MINOR_VERSION(hw)); elseif (i40e_is_aq_api_ver_lt(hw, 1, 4))
dev_info(&pdev->dev, "The driver for the device detected an older version of the NVM image v%u.%u than expected v%u.%u. Please update the NVM image.\n",
hw->aq.api_maj_ver,
hw->aq.api_min_ver,
I40E_FW_API_VERSION_MAJOR,
I40E_FW_MINOR_VERSION(hw));
i40e_verify_eeprom(pf);
/* Rev 0 hardware was never productized */ if (hw->revision_id < 1)
dev_warn(&pdev->dev, "This device is a pre-production adapter/LOM. Please be aware there may be issues with your hardware. If you are experiencing problems please contact your Intel or hardware representative who provided you with this hardware.\n");
i40e_clear_pxe_mode(hw);
err = i40e_get_capabilities(pf, i40e_aqc_opc_list_func_capabilities); if (err) goto err_adminq_setup;
/* Disable LLDP for NICs that have firmware versions lower than v4.3. * Ignore error return codes because if it was already disabled via * hardware settings this will fail
*/ if (test_bit(I40E_HW_CAP_STOP_FW_LLDP, pf->hw.caps)) {
dev_info(&pdev->dev, "Stopping firmware LLDP agent.\n");
i40e_aq_stop_lldp(hw, true, false, NULL);
}
/* allow a platform config to override the HW addr */
i40e_get_platform_mac_addr(pdev, pf);
if (!is_valid_ether_addr(hw->mac.addr)) {
dev_info(&pdev->dev, "invalid MAC address %pM\n", hw->mac.addr);
err = -EIO; goto err_mac_addr;
}
dev_info(&pdev->dev, "MAC address: %pM\n", hw->mac.addr);
ether_addr_copy(hw->mac.perm_addr, hw->mac.addr);
i40e_get_port_mac_addr(hw, hw->mac.port_addr); if (is_valid_ether_addr(hw->mac.port_addr))
set_bit(I40E_HW_CAP_PORT_ID_VALID, pf->hw.caps);
/* NVM bit on means WoL disabled for the port */
i40e_read_nvm_word(hw, I40E_SR_NVM_WAKE_ON_LAN, &wol_nvm_bits); if (BIT (hw->port) & wol_nvm_bits || hw->partition_id != 1)
pf->wol_en = false; else
pf->wol_en = true;
device_set_wakeup_enable(&pf->pdev->dev, pf->wol_en);
/* set up the main switch operations */
i40e_determine_queue_usage(pf);
err = i40e_init_interrupt_scheme(pf); if (err) goto err_switch_setup;
/* Reduce Tx and Rx pairs for kdump * When MSI-X is enabled, it's not allowed to use more TC queue * pairs than MSI-X vectors (pf->num_lan_msix) exist. Thus * vsi->num_queue_pairs will be equal to pf->num_lan_msix, i.e., 1.
*/ if (is_kdump_kernel())
pf->num_lan_msix = 1;
/* The number of VSIs reported by the FW is the minimum guaranteed * to us; HW supports far more and we share the remaining pool with * the other PFs. We allocate space for more than the guarantee with * the understanding that we might not get them all later.
*/ if (pf->hw.func_caps.num_vsis < I40E_MIN_VSI_ALLOC)
pf->num_alloc_vsi = I40E_MIN_VSI_ALLOC; else
pf->num_alloc_vsi = pf->hw.func_caps.num_vsis; if (pf->num_alloc_vsi > UDP_TUNNEL_NIC_MAX_SHARING_DEVICES) {
dev_warn(&pf->pdev->dev, "limiting the VSI count due to UDP tunnel limitation %d > %d\n",
pf->num_alloc_vsi, UDP_TUNNEL_NIC_MAX_SHARING_DEVICES);
pf->num_alloc_vsi = UDP_TUNNEL_NIC_MAX_SHARING_DEVICES;
}
/* Set up the *vsi struct and our local tracking of the MAIN PF vsi. */
pf->vsi = kcalloc(pf->num_alloc_vsi, sizeof(struct i40e_vsi *),
GFP_KERNEL); if (!pf->vsi) {
err = -ENOMEM; goto err_switch_setup;
}
#ifdef CONFIG_PCI_IOV /* prep for VF support */ if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) &&
test_bit(I40E_FLAG_MSIX_ENA, pf->flags) &&
!test_bit(__I40E_BAD_EEPROM, pf->state)) { if (pci_num_vf(pdev))
set_bit(I40E_FLAG_VEB_MODE_ENA, pf->flags);
} #endif
err = i40e_setup_pf_switch(pf, false, false); if (err) {
dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err); goto err_vsis;
}
vsi = i40e_pf_get_main_vsi(pf);
INIT_LIST_HEAD(&vsi->ch_list);
/* if FDIR VSI was set up, start it now */
vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR); if (vsi)
i40e_vsi_open(vsi);
/* The driver only wants link up/down and module qualification * reports from firmware. Note the negative logic.
*/
err = i40e_aq_set_phy_int_mask(&pf->hw,
~(I40E_AQ_EVENT_LINK_UPDOWN |
I40E_AQ_EVENT_MEDIA_NA |
I40E_AQ_EVENT_MODULE_QUAL_FAIL), NULL); if (err)
dev_info(&pf->pdev->dev, "set phy mask fail, err %pe aq_err %s\n",
ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status));
/* VF MDD event logs are rate limited to one second intervals */
ratelimit_state_init(&pf->mdd_message_rate_limit, 1 * HZ, 1);
/* Reconfigure hardware for allowing smaller MSS in the case * of TSO, so that we avoid the MDD being fired and causing * a reset in the case of small MSS+TSO.
*/
val = rd32(hw, I40E_REG_MSS); if ((val & I40E_REG_MSS_MIN_MASK) > I40E_64BYTE_MSS) {
val &= ~I40E_REG_MSS_MIN_MASK;
val |= I40E_64BYTE_MSS;
wr32(hw, I40E_REG_MSS, val);
}
if (test_bit(I40E_HW_CAP_RESTART_AUTONEG, pf->hw.caps)) {
msleep(75);
err = i40e_aq_set_link_restart_an(&pf->hw, true, NULL); if (err)
dev_info(&pf->pdev->dev, "link restart failed, err %pe aq_err %s\n",
ERR_PTR(err),
libie_aq_str(pf->hw.aq.asq_last_status));
} /* The main driver is (mostly) up and happy. We need to set this state * before setting up the misc vector or we get a race and the vector * ends up disabled forever.
*/
clear_bit(__I40E_DOWN, pf->state);
/* In case of MSIX we are going to setup the misc vector right here * to handle admin queue events etc. In case of legacy and MSI * the misc functionality and queue processing is combined in * the same vector and that gets setup at open.
*/ if (test_bit(I40E_FLAG_MSIX_ENA, pf->flags)) {
err = i40e_setup_misc_vector(pf); if (err) {
dev_info(&pdev->dev, "setup of misc vector failed: %d\n", err);
i40e_cloud_filter_exit(pf);
i40e_fdir_teardown(pf); goto err_vsis;
}
}
#ifdef CONFIG_PCI_IOV /* prep for VF support */ if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags) &&
test_bit(I40E_FLAG_MSIX_ENA, pf->flags) &&
!test_bit(__I40E_BAD_EEPROM, pf->state)) { /* disable link interrupts for VFs */
val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM);
val &= ~I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK;
wr32(hw, I40E_PFGEN_PORTMDIO_NUM, val);
i40e_flush(hw);
if (pci_num_vf(pdev)) {
dev_info(&pdev->dev, "Active VFs found, allocating resources.\n");
err = i40e_alloc_vfs(pf, pci_num_vf(pdev)); if (err)
dev_info(&pdev->dev, "Error %d allocating resources for existing VFs\n",
err);
}
} #endif/* CONFIG_PCI_IOV */
if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
pf->iwarp_base_vector = i40e_get_lump(pf, pf->irq_pile,
pf->num_iwarp_msix,
I40E_IWARP_IRQ_PILE_ID); if (pf->iwarp_base_vector < 0) {
dev_info(&pdev->dev, "failed to get tracking for %d vectors for IWARP err=%d\n",
pf->num_iwarp_msix, pf->iwarp_base_vector);
clear_bit(I40E_FLAG_IWARP_ENA, pf->flags);
}
}
i40e_dbg_pf_init(pf);
/* tell the firmware that we're starting */
i40e_send_version(pf);
/* since everything's happy, start the service_task timer */
mod_timer(&pf->service_timer,
round_jiffies(jiffies + pf->service_timer_period));
/* add this PF to client device list and launch a client service task */ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
err = i40e_lan_add_device(pf); if (err)
dev_info(&pdev->dev, "Failed to add PF to client API service list: %d\n",
err);
}
#define PCI_SPEED_SIZE 8 #define PCI_WIDTH_SIZE 8 /* Devices on the IOSF bus do not have this information * and will report PCI Gen 1 x 1 by default so don't bother * checking them.
*/ if (!test_bit(I40E_HW_CAP_NO_PCI_LINK_CHECK, pf->hw.caps)) { char speed[PCI_SPEED_SIZE] = "Unknown"; char width[PCI_WIDTH_SIZE] = "Unknown";
/* Get the negotiated link width and speed from PCI config * space
*/
pcie_capability_read_word(pf->pdev, PCI_EXP_LNKSTA,
&link_status);
i40e_set_pci_config_data(hw, link_status);
switch (hw->bus.speed) { case i40e_bus_speed_8000:
strscpy(speed, "8.0", PCI_SPEED_SIZE); break; case i40e_bus_speed_5000:
strscpy(speed, "5.0", PCI_SPEED_SIZE); break; case i40e_bus_speed_2500:
strscpy(speed, "2.5", PCI_SPEED_SIZE); break; default: break;
} switch (hw->bus.width) { case i40e_bus_width_pcie_x8:
strscpy(width, "8", PCI_WIDTH_SIZE); break; case i40e_bus_width_pcie_x4:
strscpy(width, "4", PCI_WIDTH_SIZE); break; case i40e_bus_width_pcie_x2:
strscpy(width, "2", PCI_WIDTH_SIZE); break; case i40e_bus_width_pcie_x1:
strscpy(width, "1", PCI_WIDTH_SIZE); break; default: break;
}
if (hw->bus.width < i40e_bus_width_pcie_x8 ||
hw->bus.speed < i40e_bus_speed_8000) {
dev_warn(&pdev->dev, "PCI-Express bandwidth available for this device may be insufficient for optimal performance.\n");
dev_warn(&pdev->dev, "Please move the device to a different PCI-e link with more lanes and/or higher transfer rate.\n");
}
}
/* get the requested speeds from the fw */
err = i40e_aq_get_phy_capabilities(hw, false, false, &abilities, NULL); if (err)
dev_dbg(&pf->pdev->dev, "get requested speeds ret = %pe last_status = %s\n",
ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status));
pf->hw.phy.link_info.requested_speeds = abilities.link_speed;
/* set the FEC config due to the board capabilities */
i40e_set_fec_in_flags(abilities.fec_cfg_curr_mod_ext_info, pf->flags);
/* get the supported phy types from the fw */
err = i40e_aq_get_phy_capabilities(hw, false, true, &abilities, NULL); if (err)
dev_dbg(&pf->pdev->dev, "get supported phy types ret = %pe last_status = %s\n",
ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status));
#define MAX_FRAME_SIZE_DEFAULT 0x2600
err = i40e_aq_set_mac_config(hw, MAX_FRAME_SIZE_DEFAULT, NULL); if (err)
dev_warn(&pdev->dev, "set mac config ret = %pe last_status = %s\n",
ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status));
/* Make sure the MFS is set to the expected value */
val = rd32(hw, I40E_PRTGL_SAH);
FIELD_MODIFY(I40E_PRTGL_SAH_MFS_MASK, &val, MAX_FRAME_SIZE_DEFAULT);
wr32(hw, I40E_PRTGL_SAH, val);
/* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out * PAUSE or PFC frames and potentially controlling traffic for other * PF/VF VSIs. * The FW can still send Flow control frames if enabled.
*/
i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
pf->main_vsi_seid);
if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) ||
(pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
set_bit(I40E_HW_CAP_PHY_CONTROLS_LEDS, pf->hw.caps); if (pf->hw.device_id == I40E_DEV_ID_SFP_I_X722)
set_bit(I40E_HW_CAP_CRT_RETIMER, pf->hw.caps); /* print a string summarizing features */
i40e_print_features(pf);
i40e_devlink_register(pf);
return 0;
/* Unwind what we've done if something failed in the setup */
err_vsis:
set_bit(__I40E_DOWN, pf->state);
i40e_clear_interrupt_scheme(pf);
kfree(pf->vsi);
err_switch_setup:
i40e_reset_interrupt_capability(pf);
timer_shutdown_sync(&pf->service_timer);
err_mac_addr:
err_configure_lan_hmc:
(void)i40e_shutdown_lan_hmc(hw);
err_init_lan_hmc:
kfree(pf->qp_pile);
err_sw_init:
err_adminq_setup:
err_pf_reset:
iounmap(hw->hw_addr);
err_ioremap:
i40e_free_pf(pf);
err_pf_alloc:
pci_release_mem_regions(pdev);
err_pci_reg:
err_dma:
pci_disable_device(pdev); return err;
}
/** * i40e_remove - Device removal routine * @pdev: PCI device information struct * * i40e_remove is called by the PCI subsystem to alert the driver * that is should release a PCI device. This could be caused by a * Hot-Plug event, or because the driver is going to be removed from * memory.
**/ staticvoid i40e_remove(struct pci_dev *pdev)
{ struct i40e_pf *pf = pci_get_drvdata(pdev); struct i40e_hw *hw = &pf->hw; struct i40e_vsi *vsi; struct i40e_veb *veb; int ret_code; int i;
/* Grab __I40E_RESET_RECOVERY_PENDING and set __I40E_IN_REMOVE * flags, once they are set, i40e_rebuild should not be called as * i40e_prep_for_reset always returns early.
*/ while (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
usleep_range(1000, 2000);
set_bit(__I40E_IN_REMOVE, pf->state);
if (test_bit(I40E_FLAG_SRIOV_ENA, pf->flags)) {
set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
i40e_free_vfs(pf);
clear_bit(I40E_FLAG_SRIOV_ENA, pf->flags);
} /* no more scheduling of any task */
set_bit(__I40E_SUSPENDED, pf->state);
set_bit(__I40E_DOWN, pf->state); if (pf->service_timer.function)
timer_shutdown_sync(&pf->service_timer); if (pf->service_task.func)
cancel_work_sync(&pf->service_task);
if (test_bit(__I40E_RECOVERY_MODE, pf->state)) { struct i40e_vsi *vsi = pf->vsi[0];
/* We know that we have allocated only one vsi for this PF, * it was just for registering netdevice, so the interface * could be visible in the 'ifconfig' output
*/
unregister_netdev(vsi->netdev);
free_netdev(vsi->netdev);
goto unmap;
}
/* Client close must be called explicitly here because the timer * has been stopped.
*/
i40e_notify_client_of_netdev_close(pf, false);
i40e_fdir_teardown(pf);
/* If there is a switch structure or any orphans, remove them. * This will leave only the PF's VSI remaining.
*/
i40e_pf_for_each_veb(pf, i, veb) if (veb->uplink_seid == pf->mac_seid ||
veb->uplink_seid == 0)
i40e_switch_branch_release(veb);
/* Now we can shutdown the PF's VSIs, just before we kill * adminq and hmc.
*/
i40e_pf_for_each_vsi(pf, i, vsi) {
i40e_vsi_close(vsi);
i40e_vsi_release(vsi);
pf->vsi[i] = NULL;
}
i40e_cloud_filter_exit(pf);
/* remove attached clients */ if (test_bit(I40E_FLAG_IWARP_ENA, pf->flags)) {
ret_code = i40e_lan_del_device(pf); if (ret_code)
dev_warn(&pdev->dev, "Failed to delete client device: %d\n",
ret_code);
}
/* shutdown and destroy the HMC */ if (hw->hmc.hmc_obj) {
ret_code = i40e_shutdown_lan_hmc(hw); if (ret_code)
dev_warn(&pdev->dev, "Failed to destroy the HMC resources: %d\n",
ret_code);
}
unmap: /* Free MSI/legacy interrupt 0 when in recovery mode. */ if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
!test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
free_irq(pf->pdev->irq, pf);
/* shutdown the adminq */
i40e_shutdown_adminq(hw);
/* destroy the locks only once, here */
mutex_destroy(&hw->aq.arq_mutex);
mutex_destroy(&hw->aq.asq_mutex);
/* Clear all dynamic memory lists of rings, q_vectors, and VSIs */
rtnl_lock();
i40e_clear_interrupt_scheme(pf);
i40e_pf_for_each_vsi(pf, i, vsi) { if (!test_bit(__I40E_RECOVERY_MODE, pf->state))
i40e_vsi_clear_rings(vsi);
/** * i40e_enable_mc_magic_wake - enable multicast magic packet wake up * using the mac_address_write admin q function * @pf: pointer to i40e_pf struct
**/ staticvoid i40e_enable_mc_magic_wake(struct i40e_pf *pf)
{ struct i40e_vsi *main_vsi = i40e_pf_get_main_vsi(pf); struct i40e_hw *hw = &pf->hw;
u8 mac_addr[6];
u16 flags = 0; int ret;
/* Get current MAC address in case it's an LAA */ if (main_vsi && main_vsi->netdev) {
ether_addr_copy(mac_addr, main_vsi->netdev->dev_addr);
} else {
dev_err(&pf->pdev->dev, "Failed to retrieve MAC address; using default\n");
ether_addr_copy(mac_addr, hw->mac.addr);
}
/* The FW expects the mac address write cmd to first be called with * one of these flags before calling it again with the multicast * enable flags.
*/
flags = I40E_AQC_WRITE_TYPE_LAA_WOL;
if (hw->func_caps.flex10_enable && hw->partition_id != 1)
flags = I40E_AQC_WRITE_TYPE_LAA_ONLY;
ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL); if (ret) {
dev_err(&pf->pdev->dev, "Failed to update MAC address registers; cannot enable Multicast Magic packet wake up"); return;
}
flags = I40E_AQC_MC_MAG_EN
| I40E_AQC_WOL_PRESERVE_ON_PFR
| I40E_AQC_WRITE_TYPE_UPDATE_MC_MAG;
ret = i40e_aq_mac_address_write(hw, flags, mac_addr, NULL); if (ret)
dev_err(&pf->pdev->dev, "Failed to enable Multicast Magic Packet wake up\n");
}
/* Ensure service task will not be running */
timer_delete_sync(&pf->service_timer);
cancel_work_sync(&pf->service_task);
/* Client close must be called explicitly here because the timer * has been stopped.
*/
i40e_notify_client_of_netdev_close(pf, false);
if (test_bit(I40E_HW_CAP_WOL_MC_MAGIC_PKT_WAKE, pf->hw.caps) &&
pf->wol_en)
i40e_enable_mc_magic_wake(pf);
/* Since we're going to destroy queues during the * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this * whole section
*/
rtnl_lock();
/* Clear the interrupt scheme and release our IRQs so that the system * can safely hibernate even when there are a large number of CPUs. * Otherwise hibernation might fail when mapping all the vectors back * to CPU0.
*/
i40e_clear_interrupt_scheme(pf);
/* We need to hold the RTNL lock prior to restoring interrupt schemes, * since we're going to be restoring queues
*/
rtnl_lock();
/* We cleared the interrupt scheme when we suspended, so we need to * restore it now to resume device functionality.
*/
err = i40e_restore_interrupt_scheme(pf); if (err) {
dev_err(dev, "Cannot restore interrupt scheme: %d\n",
err);
}
/* Clear suspended state last after everything is recovered */
clear_bit(__I40E_SUSPENDED, pf->state);
/* Restart the service task */
mod_timer(&pf->service_timer,
round_jiffies(jiffies + pf->service_timer_period));
return 0;
}
/** * i40e_pci_error_detected - warning that something funky happened in PCI land * @pdev: PCI device information struct * @error: the type of PCI error * * Called to warn that something happened and the error handling steps * are in progress. Allows the driver to quiesce things, be ready for * remediation.
**/ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev,
pci_channel_state_t error)
{ struct i40e_pf *pf = pci_get_drvdata(pdev);
if (!pf) {
dev_info(&pdev->dev, "Cannot recover - error happened during device probe\n"); return PCI_ERS_RESULT_DISCONNECT;
}
/* shutdown all operations */ if (!test_bit(__I40E_SUSPENDED, pf->state))
i40e_io_suspend(pf);
/* Request a slot reset */ return PCI_ERS_RESULT_NEED_RESET;
}
/** * i40e_pci_error_slot_reset - a PCI slot reset just happened * @pdev: PCI device information struct * * Called to find if the driver can work with the device now that * the pci slot has been reset. If a basic connection seems good * (registers are readable and have sane content) then return a * happy little PCI_ERS_RESULT_xxx.
**/ static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev)
{ struct i40e_pf *pf = pci_get_drvdata(pdev);
pci_ers_result_t result;
u32 reg;
dev_dbg(&pdev->dev, "%s\n", __func__); /* enable I/O and memory of the device */ if (pci_enable_device(pdev)) {
dev_info(&pdev->dev, "Cannot re-enable PCI device after reset.\n");
result = PCI_ERS_RESULT_DISCONNECT;
} else {
pci_set_master(pdev);
pci_restore_state(pdev);
pci_save_state(pdev);
pci_wake_from_d3(pdev, false);
reg = rd32(&pf->hw, I40E_GLGEN_RTRIG); if (reg == 0)
result = PCI_ERS_RESULT_RECOVERED; else
result = PCI_ERS_RESULT_DISCONNECT;
}
/** * i40e_pci_error_resume - restart operations after PCI error recovery * @pdev: PCI device information struct * * Called to allow the driver to bring things back up after PCI error * and/or reset recovery has finished.
**/ staticvoid i40e_pci_error_resume(struct pci_dev *pdev)
{ struct i40e_pf *pf = pci_get_drvdata(pdev);
dev_dbg(&pdev->dev, "%s\n", __func__); if (test_bit(__I40E_SUSPENDED, pf->state)) return;
/* Free MSI/legacy interrupt 0 when in recovery mode. */ if (test_bit(__I40E_RECOVERY_MODE, pf->state) &&
!test_bit(I40E_FLAG_MSIX_ENA, pf->flags))
free_irq(pf->pdev->irq, pf);
/* Since we're going to destroy queues during the * i40e_clear_interrupt_scheme() we should hold the RTNL lock for this * whole section
*/
rtnl_lock();
i40e_clear_interrupt_scheme(pf);
rtnl_unlock();
/** * i40e_suspend - PM callback for moving to D3 * @dev: generic device information structure
**/ staticint i40e_suspend(struct device *dev)
{ struct i40e_pf *pf = dev_get_drvdata(dev);
/* If we're already suspended, then there is nothing to do */ if (test_and_set_bit(__I40E_SUSPENDED, pf->state)) return 0; return i40e_io_suspend(pf);
}
/** * i40e_resume - PM callback for waking up from D3 * @dev: generic device information structure
**/ staticint i40e_resume(struct device *dev)
{ struct i40e_pf *pf = dev_get_drvdata(dev);
/* If we're not suspended, then there is nothing to do */ if (!test_bit(__I40E_SUSPENDED, pf->state)) return 0; return i40e_io_resume(pf);
}
/** * i40e_init_module - Driver registration routine * * i40e_init_module is the first routine called when the driver is * loaded. All it does is register with the PCI subsystem.
**/ staticint __init i40e_init_module(void)
{ int err;
/* There is no need to throttle the number of active tasks because * each device limits its own task using a state bit for scheduling * the service task, and the device tasks do not interfere with each * other, so we don't set a max task limit. We must set WQ_MEM_RECLAIM * since we need to be able to guarantee forward progress even under * memory pressure.
*/
i40e_wq = alloc_workqueue("%s", 0, 0, i40e_driver_name); if (!i40e_wq) {
pr_err("%s: Failed to create workqueue\n", i40e_driver_name); return -ENOMEM;
}
/** * i40e_exit_module - Driver exit cleanup routine * * i40e_exit_module is called just before the driver is removed * from memory.
**/ staticvoid __exit i40e_exit_module(void)
{
pci_unregister_driver(&i40e_driver);
destroy_workqueue(i40e_wq);
ida_destroy(&i40e_client_ida);
i40e_dbg_exit();
}
module_exit(i40e_exit_module);
Messung V0.5 in Prozent
¤ Dauer der Verarbeitung: 0.695 Sekunden
(vorverarbeitet am 2026-04-28)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.