/* In case lazy-allocation is allowed, postpone allocation until the * end of the NAPI run. We'd still need to make sure the Rx ring has * sufficient buffers to guarantee an additional Rx interrupt.
*/ if (allow_lazy && likely(rxq->filled_buffers > 12)) {
rxq->filled_buffers--; return 0;
}
data = alloc_pages(GFP_ATOMIC, 0); if (unlikely(!data)) return -ENOMEM;
/* Map the entire page as it would be used * for multiple RX buffer segment size mapping.
*/
mapping = dma_map_page(rxq->dev, data, 0,
PAGE_SIZE, rxq->data_direction); if (unlikely(dma_mapping_error(rxq->dev, mapping))) {
__free_page(data); return -ENOMEM;
}
/* Unmap the data of the skb frags */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++, bds_consumed++) {
tx_data_bd = (struct eth_tx_bd *)
qed_chain_consume(&txq->tx_pbl);
dma_unmap_page(&edev->pdev->dev, BD_UNMAP_ADDR(tx_data_bd),
BD_UNMAP_LEN(tx_data_bd), DMA_TO_DEVICE);
}
while (bds_consumed++ < nbds)
qed_chain_consume(&txq->tx_pbl);
/* Unmap the data of the skb frags */ for (i = 0; i < nbd; i++) {
tx_data_bd = (struct eth_tx_bd *)
qed_chain_produce(&txq->tx_pbl); if (tx_data_bd->nbytes)
dma_unmap_page(txq->dev,
BD_UNMAP_ADDR(tx_data_bd),
BD_UNMAP_LEN(tx_data_bd), DMA_TO_DEVICE);
}
/* Return again prod to its position before this skb was handled */
qed_chain_set_prod(&txq->tx_pbl,
le16_to_cpu(txq->tx_db.data.bd_prod), first_bd);
staticinlinevoid qede_update_tx_producer(struct qede_tx_queue *txq)
{ /* wmb makes sure that the BDs data is updated before updating the * producer, otherwise FW may read old data from the BDs.
*/
wmb();
barrier();
writel(txq->tx_db.raw, txq->doorbell_addr);
/* Fence required to flush the write combined buffer, since another * CPU may write to the same doorbell address and data may be lost * due to relaxed order nature of write combined bar.
*/
wmb();
}
/* Need to make the tx_bd_cons update visible to start_xmit() * before checking for netif_tx_queue_stopped(). Without the * memory barrier, there is a small possibility that * start_xmit() will miss it and cause the queue to be stopped * forever. * On the other hand we need an rmb() here to ensure the proper * ordering of bit testing in the following * netif_tx_queue_stopped(txq) call.
*/
smp_mb();
if (unlikely(netif_tx_queue_stopped(netdev_txq))) { /* Taking tx_lock is needed to prevent reenabling the queue * while it's empty. This could have happen if rx_action() gets * suspended in qede_tx_int() after the condition before * netif_tx_wake_queue(), while tx_action (qede_start_xmit()): * * stops the queue->sees fresh tx_bd_cons->releases the queue-> * sends some packets consuming the whole queue again-> * stops the queue
*/
/* This function reuses the buffer(from an offset) from * consumer index to producer index in the bd ring
*/ staticinlinevoid qede_reuse_page(struct qede_rx_queue *rxq, struct sw_rx_data *curr_cons)
{ struct eth_rx_bd *rx_bd_prod = qed_chain_produce(&rxq->rx_bd_ring); struct sw_rx_data *curr_prod;
dma_addr_t new_mapping;
/* In case of allocation failures reuse buffers * from consumer index to produce buffers for firmware
*/ void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, u8 count)
{ struct sw_rx_data *curr_cons;
staticinlineint qede_realloc_rx_buffer(struct qede_rx_queue *rxq, struct sw_rx_data *curr_cons)
{ /* Move to the next segment in the page */
curr_cons->page_offset += rxq->rx_buf_seg_size;
if (curr_cons->page_offset == PAGE_SIZE) { if (unlikely(qede_alloc_rx_buffer(rxq, true))) { /* Since we failed to allocate new buffer * current buffer can be used again.
*/
curr_cons->page_offset -= rxq->rx_buf_seg_size;
return -ENOMEM;
}
dma_unmap_page(rxq->dev, curr_cons->mapping,
PAGE_SIZE, rxq->data_direction);
} else { /* Increment refcount of the page as we don't want * network stack to take the ownership of the page * which can be recycled multiple times by the driver.
*/
page_ref_inc(curr_cons->data);
qede_reuse_page(rxq, curr_cons);
}
/* Make sure that the BD and SGE data is updated before updating the * producers since FW might read the BD/SGE right after the producer * is updated.
*/
wmb();
if (unlikely(tpa_info->state != QEDE_AGG_STATE_START)) goto out;
/* Add one frag and update the appropriate fields in the skb */
skb_fill_page_desc(skb, tpa_info->frag_id++,
current_bd->data,
current_bd->page_offset + rxq->rx_headroom,
len_on_bd);
if (unlikely(qede_realloc_rx_buffer(rxq, current_bd))) { /* Incr page ref count to reuse on allocation failure * so that it doesn't get freed while freeing SKB.
*/
page_ref_inc(current_bd->data); goto out;
}
/* For smaller frames still need to allocate skb, memcpy * data and benefit in reusing the page segment instead of * un-mapping it.
*/ if ((len + pad <= edev->rx_copybreak)) { unsignedint offset = bd->page_offset + pad;
skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE); if (unlikely(!skb)) return NULL;
if (unlikely(qede_realloc_rx_buffer(rxq, bd))) { /* Incr page ref count to reuse on allocation failure so * that it doesn't get freed while freeing SKB [as its * already mapped there].
*/
page_ref_inc(bd->data);
dev_kfree_skb_any(skb); return NULL;
}
out: /* We've consumed the first BD and prepared an SKB */
qede_rx_bd_ring_consume(rxq);
if (unlikely(!tpa_info->skb)) {
DP_NOTICE(edev, "Failed to allocate SKB for gro\n");
/* Consume from ring but do not produce since * this might be used by FW still, it will be re-used * at TPA end.
*/
tpa_info->tpa_start_fail = true;
qede_rx_bd_ring_consume(rxq);
tpa_info->state = QEDE_AGG_STATE_ERROR; goto cons_buf;
}
/* This is needed in order to enable forwarding support */
qede_set_gro_params(edev, tpa_info->skb, cqe);
cons_buf: /* We still need to handle bd_len_list to consume buffers */ if (likely(cqe->bw_ext_bd_len_list[0]))
qede_fill_frag_skb(edev, rxq, cqe->tpa_agg_index,
le16_to_cpu(cqe->bw_ext_bd_len_list[0]));
if (unlikely(cqe->bw_ext_bd_len_list[1])) {
DP_ERR(edev, "Unlikely - got a TPA aggregation with more than one bw_ext_bd_len_list entry in the TPA start\n");
tpa_info->state = QEDE_AGG_STATE_ERROR;
}
}
staticvoid qede_gro_receive(struct qede_dev *edev, struct qede_fastpath *fp, struct sk_buff *skb,
u16 vlan_tag)
{ /* FW can send a single MTU sized packet from gro flow * due to aggregation timeout/last segment etc. which * is not expected to be a gro packet. If a skb has zero * frags then simply push it in the stack as non gso skb.
*/ if (unlikely(!skb->data_len)) {
skb_shinfo(skb)->gso_type = 0;
skb_shinfo(skb)->gso_size = 0; goto send_skb;
}
#ifdef CONFIG_INET if (skb_shinfo(skb)->gso_size) {
skb_reset_network_header(skb);
switch (skb->protocol) { case htons(ETH_P_IP):
qede_gro_ip_csum(skb); break; case htons(ETH_P_IPV6):
qede_gro_ipv6_csum(skb); break; default:
DP_ERR(edev, "Error: FW GRO supports only IPv4/IPv6, not 0x%04x\n",
ntohs(skb->protocol));
}
} #endif
if (tpa_info->buffer.page_offset == PAGE_SIZE)
dma_unmap_page(rxq->dev, tpa_info->buffer.mapping,
PAGE_SIZE, rxq->data_direction);
for (i = 0; cqe->len_list[i]; i++)
qede_fill_frag_skb(edev, rxq, cqe->tpa_agg_index,
le16_to_cpu(cqe->len_list[i])); if (unlikely(i > 1))
DP_ERR(edev, "Strange - TPA emd with more than a single len_list entry\n");
if (unlikely(tpa_info->state != QEDE_AGG_STATE_START)) goto err;
/* Sanity */ if (unlikely(cqe->num_of_bds != tpa_info->frag_id + 1))
DP_ERR(edev, "Strange - TPA had %02x BDs, but SKB has only %d frags\n",
cqe->num_of_bds, tpa_info->frag_id); if (unlikely(skb->len != le16_to_cpu(cqe->total_packet_len)))
DP_ERR(edev, "Strange - total packet len [cqe] is %4x but SKB has len %04x\n",
le16_to_cpu(cqe->total_packet_len), skb->len);
/* Recalculate, as XDP might have changed the headers */
*data_offset = xdp.data - xdp.data_hard_start;
*len = xdp.data_end - xdp.data;
if (act == XDP_PASS) returntrue;
/* Count number of packets not to be passed to stack */
rxq->xdp_no_pass++;
switch (act) { case XDP_TX: /* We need the replacement buffer before transmit. */ if (unlikely(qede_alloc_rx_buffer(rxq, true))) {
qede_recycle_rx_bd_ring(rxq, 1);
/* Now if there's a transmission problem, we'd still have to * throw current buffer, as replacement was already allocated.
*/ if (unlikely(qede_xdp_xmit(fp->xdp_tx, bd->mapping,
*data_offset, *len, bd->data,
NULL))) {
dma_unmap_page(rxq->dev, bd->mapping, PAGE_SIZE,
rxq->data_direction);
__free_page(bd->data);
/* Regardless, we've consumed an Rx BD */
qede_rx_bd_ring_consume(rxq); break; case XDP_REDIRECT: /* We need the replacement buffer before transmit. */ if (unlikely(qede_alloc_rx_buffer(rxq, true))) {
qede_recycle_rx_bd_ring(rxq, 1);
/* We've already used one BD for the SKB. Now take care of the rest */ for (num_frags = cqe->bd_num - 1; num_frags > 0; num_frags--) {
u16 cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size :
pkt_len;
if (unlikely(!cur_size)) {
DP_ERR(edev, "Still got %d BDs for mapping jumbo, but length became 0\n",
num_frags); goto out;
}
/* We need a replacement buffer for each BD */ if (unlikely(qede_alloc_rx_buffer(rxq, true))) goto out;
/* Now that we've allocated the replacement buffer, * we can safely consume the next BD and map it to the SKB.
*/
bd_cons_idx = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
bd = &rxq->sw_rx_ring[bd_cons_idx];
qede_rx_bd_ring_consume(rxq);
/* Get the data from the SW ring; Consume it only after it's evident * we wouldn't recycle it.
*/
bd_cons_idx = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
bd = &rxq->sw_rx_ring[bd_cons_idx];
fp_cqe = &cqe->fast_path_regular;
len = le16_to_cpu(fp_cqe->len_on_first_bd);
pad = fp_cqe->placement_offset + rxq->rx_headroom;
/* Run eBPF program if one is attached */ if (xdp_prog) if (!qede_rx_xdp(edev, fp, rxq, xdp_prog, bd, fp_cqe,
&pad, &len)) return 0;
/* If this is an error packet then drop it */
flags = cqe->fast_path_regular.pars_flags.flags;
parse_flag = le16_to_cpu(flags);
csum_flag = qede_check_csum(parse_flag); if (unlikely(csum_flag == QEDE_CSUM_ERROR)) { if (qede_pkt_is_ip_fragmented(fp_cqe, parse_flag))
rxq->rx_ip_frags++; else
rxq->rx_hw_errors++;
}
/* Basic validation passed; Need to prepare an SKB. This would also * guarantee to finally consume the first BD upon success.
*/
skb = qede_rx_build_skb(edev, rxq, bd, len, pad); if (!skb) {
rxq->rx_alloc_errors++;
qede_recycle_rx_bd_ring(rxq, fp_cqe->bd_num); return 0;
}
/* In case of Jumbo packet, several PAGE_SIZEd buffers will be pointed * by a single cqe.
*/ if (fp_cqe->bd_num > 1) {
u16 unmapped_frags = qede_rx_build_jumbo(edev, rxq, skb,
fp_cqe, len);
/* Memory barrier to prevent the CPU from doing speculative reads of CQE * / BD in the while-loop before reading hw_comp_cons. If the CQE is * read before it is written by FW, then FW writes CQE and SB, and then * the CPU reads the hw_comp_cons, it will use an old CQE.
*/
rmb();
/* Loop to complete all indicated BDs */ while ((sw_comp_cons != hw_comp_cons) && (work_done < budget)) {
rcv_pkts += qede_rx_process_cqe(edev, fp, rxq);
qed_chain_recycle_consumed(&rxq->rx_comp_ring);
sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring);
work_done++;
}
rxq->rcv_pkts += rcv_pkts;
/* Allocate replacement buffers */ while (rxq->num_rx_buffers - rxq->filled_buffers) if (qede_alloc_rx_buffer(rxq, false)) break;
/* *_has_*_work() reads the status block, thus we need to ensure that * status block indices have been actually read (qed_sb_update_sb_idx) * prior to this check (*_has_*_work) so that we won't write the * "newer" value of the status block to HW (if there was a DMA right * after qede_has_rx_work and if there is no rmb, the memory reading * (qed_sb_update_sb_idx) may be postponed to right before *_ack_sb). * In this case there will never be another interrupt until there is * another update of the status block, while there is still unhandled * work.
*/
rmb();
if (likely(fp->type & QEDE_FASTPATH_RX)) if (qede_has_rx_work(fp->rxq)) returntrue;
if (fp->type & QEDE_FASTPATH_XDP) if (qede_txq_has_work(fp->xdp_tx)) returntrue;
if (likely(fp->type & QEDE_FASTPATH_TX)) { int cos;
for_each_cos_in_txq(fp->edev, cos) { if (qede_txq_has_work(&fp->txq[cos])) returntrue;
}
}
returnfalse;
}
/********************* * NDO & API related *
*********************/ int qede_poll(struct napi_struct *napi, int budget)
{ struct qede_fastpath *fp = container_of(napi, struct qede_fastpath,
napi); struct qede_dev *edev = fp->edev; int rx_work_done = 0;
u16 xdp_prod;
fp->xdp_xmit = 0;
if (likely(fp->type & QEDE_FASTPATH_TX)) { int cos;
for_each_cos_in_txq(fp->edev, cos) { if (qede_txq_has_work(&fp->txq[cos]))
qede_tx_int(edev, &fp->txq[cos]);
}
}
if ((fp->type & QEDE_FASTPATH_XDP) && qede_txq_has_work(fp->xdp_tx))
qede_xdp_tx_int(edev, fp->xdp_tx);
if (fp->xdp_xmit & QEDE_XDP_REDIRECT)
xdp_do_flush();
/* Handle case where we are called by netpoll with a budget of 0 */ if (rx_work_done < budget || !budget) { if (!qede_poll_is_more_work(fp)) {
napi_complete_done(napi, rx_work_done);
/* Fill the entry in the SW ring and the BDs in the FW ring */
idx = txq->sw_tx_prod;
txq->sw_tx_ring.skbs[idx].skb = skb;
first_bd = (struct eth_tx_1st_bd *)
qed_chain_produce(&txq->tx_pbl);
memset(first_bd, 0, sizeof(*first_bd));
first_bd->data.bd_flags.bitfields =
1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
qede_ptp_tx_ts(edev, skb);
/* Map skb linear data for DMA and set in the first BD */
mapping = dma_map_single(txq->dev, skb->data,
skb_headlen(skb), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(txq->dev, mapping))) {
DP_NOTICE(edev, "SKB mapping failed\n");
qede_free_failed_tx_pkt(txq, first_bd, 0, false);
qede_update_tx_producer(txq); return NETDEV_TX_OK;
}
nbd++;
BD_SET_UNMAP_ADDR_LEN(first_bd, mapping, skb_headlen(skb));
/* In case there is IPv6 with extension headers or LSO we need 2nd and * 3rd BDs.
*/ if (unlikely((xmit_type & XMIT_LSO) | ipv6_ext)) {
second_bd = (struct eth_tx_2nd_bd *)
qed_chain_produce(&txq->tx_pbl);
memset(second_bd, 0, sizeof(*second_bd));
/* Fill the parsing flags & params according to the requested offload */ if (xmit_type & XMIT_L4_CSUM) { /* We don't re-calculate IP checksum as it is already done by * the upper stack
*/
first_bd->data.bd_flags.bitfields |=
1 << ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT;
if (xmit_type & XMIT_ENC) {
first_bd->data.bd_flags.bitfields |=
1 << ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT;
val |= (1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT);
}
/* Legacy FW had flipped behavior in regard to this bit - * I.e., needed to set to prevent FW from touching encapsulated * packets when it didn't need to.
*/ if (unlikely(txq->is_legacy))
val ^= (1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT);
/* If the packet is IPv6 with extension header, indicate that * to FW and pass few params, since the device cracker doesn't * support parsing IPv6 with extension header/s.
*/ if (unlikely(ipv6_ext))
qede_set_params_for_ipv6_ext(skb, second_bd, third_bd);
}
/* @@@TBD - if will not be removed need to check */
third_bd->data.bitfields |=
cpu_to_le16(1 << ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT);
/* Make life easier for FW guys who can't deal with header and * data on same BD. If we need to split, use the second bd...
*/ if (unlikely(skb_headlen(skb) > hlen)) {
DP_VERBOSE(edev, NETIF_MSG_TX_QUEUED, "TSO split header size is %d (%x:%x)\n",
first_bd->nbytes, first_bd->addr.hi,
first_bd->addr.lo);
/* update the first BD with the actual num BDs */
first_bd->data.nbds = nbd;
netdev_tx_sent_queue(netdev_txq, skb->len);
skb_tx_timestamp(skb);
/* Advance packet producer only before sending the packet since mapping * of pages may fail.
*/
txq->sw_tx_prod = (txq->sw_tx_prod + 1) % txq->num_tx_buffers;
/* 'next page' entries are counted in the producer value */
txq->tx_db.data.bd_prod =
cpu_to_le16(qed_chain_get_prod_idx(&txq->tx_pbl));
if (!netdev_xmit_more() || netif_xmit_stopped(netdev_txq))
qede_update_tx_producer(txq);
if (unlikely(qed_chain_get_elem_left(&txq->tx_pbl)
< (MAX_SKB_FRAGS + 1))) { if (netdev_xmit_more())
qede_update_tx_producer(txq);
netif_tx_stop_queue(netdev_txq);
txq->stopped_cnt++;
DP_VERBOSE(edev, NETIF_MSG_TX_QUEUED, "Stop queue was called\n"); /* paired memory barrier is in qede_tx_int(), we have to keep * ordering of set_bit() in netif_tx_stop_queue() and read of * fp->bd_tx_cons
*/
smp_mb();
if ((qed_chain_get_elem_left(&txq->tx_pbl) >=
(MAX_SKB_FRAGS + 1)) &&
(edev->state == QEDE_STATE_OPEN)) {
netif_tx_wake_queue(netdev_txq);
DP_VERBOSE(edev, NETIF_MSG_TX_QUEUED, "Wake queue was called\n");
}
}
switch (vlan_get_protocol(skb)) { case htons(ETH_P_IP):
l4_proto = ip_hdr(skb)->protocol; break; case htons(ETH_P_IPV6):
l4_proto = ipv6_hdr(skb)->nexthdr; break; default: return features;
}
/* Disable offloads for geneve tunnels, as HW can't parse * the geneve header which has option length greater than 32b * and disable offloads for the ports which are not offloaded.
*/ if (l4_proto == IPPROTO_UDP) { struct qede_dev *edev = netdev_priv(dev);
u16 hdrlen, vxln_port, gnv_port;
if ((skb_inner_mac_header(skb) -
skb_transport_header(skb)) > hdrlen ||
(ntohs(udp_hdr(skb)->dest) != vxln_port &&
ntohs(udp_hdr(skb)->dest) != gnv_port)) return features & ~(NETIF_F_CSUM_MASK |
NETIF_F_GSO_MASK);
} elseif (l4_proto == IPPROTO_IPIP) { /* IPIP tunnels are unknown to the device or at least unsupported natively, * offloads for them can't be done trivially, so disable them for such skb.
*/ return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
}
}
return features;
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.7 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.