/* CREATE_TRACE_POINTS only needs to be defined once. Other dpaa files * using trace events only need to #include <trace/events/sched.h>
*/ #define CREATE_TRACE_POINTS #include"dpaa_eth_trace.h"
#define DPAA_INGRESS_CS_THRESHOLD 0x10000000 /* Ingress congestion threshold on FMan ports * The size in bytes of the ingress tail-drop threshold on FMan ports. * Traffic piling up above this value will be rejected by QMan and discarded * by FMan.
*/
/* Size in bytes of the FQ taildrop threshold */ #define DPAA_FQ_TD 0x200000
#define DPAA_CS_THRESHOLD_1G 0x06000000 /* Egress congestion threshold on 1G ports, range 0x1000 .. 0x10000000 * The size in bytes of the egress Congestion State notification threshold on * 1G ports. The 1G dTSECs can quite easily be flooded by cores doing Tx in a * tight loop (e.g. by sending UDP datagrams at "while(1) speed"), * and the larger the frame size, the more acute the problem. * So we have to find a balance between these factors: * - avoiding the device staying congested for a prolonged time (risking * the netdev watchdog to fire - see also the tx_timeout module param); * - affecting performance of protocols such as TCP, which otherwise * behave well under the congestion notification mechanism; * - preventing the Tx cores from tightly-looping (as if the congestion * threshold was too low to be effective); * - running out of memory if the CS threshold is set too high.
*/
#define DPAA_CS_THRESHOLD_10G 0x10000000 /* The size in bytes of the egress Congestion State notification threshold on * 10G ports, range 0x1000 .. 0x10000000
*/
/* Largest value that the FQD's OAL field can hold */ #define FSL_QMAN_MAX_OAL 127
/* Default alignment for start of data in an Rx FD */ #ifdef CONFIG_DPAA_ERRATUM_A050385 /* aligning data start to 64 avoids DMA transaction splits, unless the buffer * is crossing a 4k page boundary
*/ #define DPAA_FD_DATA_ALIGNMENT (fman_has_errata_a050385() ? 64 : 16) /* aligning to 256 avoids DMA transaction splits caused by 4k page boundary * crossings; also, all SG fragments except the last must have a size multiple * of 256 to avoid DMA transaction splits
*/ #define DPAA_A050385_ALIGN 256 #define DPAA_FD_RX_DATA_ALIGNMENT (fman_has_errata_a050385() ? \
DPAA_A050385_ALIGN : 16) #else #define DPAA_FD_DATA_ALIGNMENT 16 #define DPAA_FD_RX_DATA_ALIGNMENT DPAA_FD_DATA_ALIGNMENT #endif
/* The DPAA requires 256 bytes reserved and mapped for the SGT */ #define DPAA_SGT_SIZE 256
/* Values for the L3R field of the FM Parse Results
*/ /* L3 Type field: First IP Present IPv4 */ #define FM_L3_PARSE_RESULT_IPV4 0x8000 /* L3 Type field: First IP Present IPv6 */ #define FM_L3_PARSE_RESULT_IPV6 0x4000 /* Values for the L4R field of the FM Parse Results */ /* L4 Type field: UDP */ #define FM_L4_PARSE_RESULT_UDP 0x40 /* L4 Type field: TCP */ #define FM_L4_PARSE_RESULT_TCP 0x20
/* FD status field indicating whether the FM Parser has attempted to validate * the L4 csum of the frame. * Note that having this bit set doesn't necessarily imply that the checksum * is valid. One would have to check the parse results to find that out.
*/ #define FM_FD_STAT_L4CV 0x00000004
#define DPAA_SGT_MAX_ENTRIES 16 /* maximum number of entries in SG Table */ #define DPAA_BUFF_RELEASE_MAX 8 /* maximum number of buffers released at once */
/* Although we access another CPU's private data here * we do it at initialization so it is safe
*/
for_each_possible_cpu(i) {
percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
percpu_priv->net_dev = net_dev;
}
if (is_valid_ether_addr(mac_addr)) {
memcpy(net_dev->perm_addr, mac_addr, net_dev->addr_len);
eth_hw_addr_set(net_dev, mac_addr);
} else {
eth_hw_addr_random(net_dev);
err = mac_dev->change_addr(mac_dev->fman_mac,
(const enet_addr_t *)net_dev->dev_addr); if (err) {
dev_err(dev, "Failed to set random MAC address\n"); return -EINVAL;
}
dev_info(dev, "Using random MAC address: %pM\n",
net_dev->dev_addr);
}
/* The rest of the config is filled in by the mac device already */
mac_dev->phylink_config.dev = &net_dev->dev;
mac_dev->phylink_config.type = PHYLINK_NETDEV;
mac_dev->update_speed = dpaa_eth_cgr_set_speed;
mac_dev->phylink = phylink_create(&mac_dev->phylink_config,
dev_fwnode(mac_dev->dev),
mac_dev->phy_if,
mac_dev->phylink_ops); if (IS_ERR(mac_dev->phylink)) {
err = PTR_ERR(mac_dev->phylink);
dev_err_probe(dev, err, "Could not create phylink\n"); return err;
}
/* start without the RUNNING flag, phylib controls it later */
netif_carrier_off(net_dev);
/* Calculates the statistics for the given device by adding the statistics * collected by each CPU.
*/ staticvoid dpaa_get_stats64(struct net_device *net_dev, struct rtnl_link_stats64 *s)
{ int numstats = sizeof(struct rtnl_link_stats64) / sizeof(u64); struct dpaa_priv *priv = netdev_priv(net_dev); struct dpaa_percpu_priv *percpu_priv;
u64 *netstats = (u64 *)s;
u64 *cpustats; int i, j;
/* checks if this bpool is already allocated */ staticbool dpaa_bpid2pool_use(int bpid)
{ if (dpaa_bpid2pool(bpid)) {
refcount_inc(&dpaa_bp_array[bpid]->refs); returntrue;
}
returnfalse;
}
/* called only once per bpid by dpaa_bp_alloc_pool() */ staticvoid dpaa_bpid2pool_map(int bpid, struct dpaa_bp *dpaa_bp)
{
dpaa_bp_array[bpid] = dpaa_bp;
refcount_set(&dpaa_bp->refs, 1);
}
staticint dpaa_bp_alloc_pool(struct dpaa_bp *dpaa_bp)
{ int err;
if (dpaa_bp->size == 0 || dpaa_bp->config_count == 0) {
pr_err("%s: Buffer pool is not properly initialized! Missing size or initial number of buffers\n",
__func__); return -EINVAL;
}
/* If the pool is already specified, we only create one per bpid */ if (dpaa_bp->bpid != FSL_DPAA_BPID_INV &&
dpaa_bpid2pool_use(dpaa_bp->bpid)) return 0;
if (dpaa_bp->bpid == FSL_DPAA_BPID_INV) {
dpaa_bp->pool = bman_new_pool(); if (!dpaa_bp->pool) {
pr_err("%s: bman_new_pool() failed\n",
__func__); return -ENODEV;
}
if (dpaa_bp->seed_cb) {
err = dpaa_bp->seed_cb(dpaa_bp); if (err) goto pool_seed_failed;
}
dpaa_bpid2pool_map(dpaa_bp->bpid, dpaa_bp);
return 0;
pool_seed_failed:
pr_err("%s: pool seeding failed\n", __func__);
bman_free_pool(dpaa_bp->pool);
return err;
}
/* remove and free all the buffers from the given buffer pool */ staticvoid dpaa_bp_drain(struct dpaa_bp *bp)
{
u8 num = 8; int ret;
do { struct bm_buffer bmb[8]; int i;
ret = bman_acquire(bp->pool, bmb, num); if (ret < 0) { if (num == 8) { /* we have less than 8 buffers left; * drain them one by one
*/
num = 1;
ret = 1; continue;
} else { /* Pool is fully drained */ break;
}
}
if (bp->free_buf_cb) for (i = 0; i < num; i++)
bp->free_buf_cb(bp, &bmb[i]);
} while (ret > 0);
}
/* the mapping between bpid and dpaa_bp is done very late in the * allocation procedure; if something failed before the mapping, the bp * was not configured, therefore we don't need the below instructions
*/ if (!bp) return;
/* Use multiple WQs for FQ assignment: * - Tx Confirmation queues go to WQ1. * - Rx Error and Tx Error queues go to WQ5 (giving them a better chance * to be scheduled, in case there are many more FQs in WQ6). * - Rx Default goes to WQ6. * - Tx queues go to different WQs depending on their priority. Equal * chunks of NR_CPUS queues go to WQ6 (lowest priority), WQ2, WQ1 and * WQ0 (highest priority). * This ensures that Tx-confirmed buffers are timely released. In particular, * it avoids congestion on the Tx Confirm FQs, which can pile up PFDRs if they * are greatly outnumbered by other FQs in the system, while * dequeue scheduling is round-robin.
*/ staticinlinevoid dpaa_assign_wq(struct dpaa_fq *fq, int idx)
{ switch (fq->fq_type) { case FQ_TYPE_TX_CONFIRM: case FQ_TYPE_TX_CONF_MQ:
fq->wq = 1; break; case FQ_TYPE_RX_ERROR: case FQ_TYPE_TX_ERROR:
fq->wq = 5; break; case FQ_TYPE_RX_DEFAULT: case FQ_TYPE_RX_PCD:
fq->wq = 6; break; case FQ_TYPE_TX: switch (idx / dpaa_num_txqs_per_tc()) { case 0: /* Low priority (best effort) */
fq->wq = 6; break; case 1: /* Medium priority */
fq->wq = 2; break; case 2: /* High priority */
fq->wq = 1; break; case 3: /* Very high priority */
fq->wq = 0; break; default:
WARN(1, "Too many TX FQs: more than %zu!\n",
dpaa_max_num_txqs());
} break; default:
WARN(1, "Invalid FQ type %d for FQID %d!\n",
fq->fq_type, fq->fqid);
}
}
/* the PCD FQIDs range needs to be aligned for correct operation */ if (qman_alloc_fqid_range(&fq_base, 2 * DPAA_ETH_PCD_RXQ_NUM)) goto fq_alloc_failed;
/* Congestion group state change notification callback. * Stops the device's egress queues while they are congested and * wakes them upon exiting congested state. * Also updates some CGR-related stats.
*/ staticvoid dpaa_eth_cgscn(struct qman_portal *qm, struct qman_cgr *cgr, int congested)
{ struct dpaa_priv *priv = (struct dpaa_priv *)container_of(cgr, struct dpaa_priv, cgr_data.cgr);
/* Set different thresholds based on the configured MAC speed. * This may turn suboptimal if the MAC is reconfigured at another * speed, so MACs must call dpaa_eth_cgr_set_speed in their link_up * callback.
*/ if (priv->mac_dev->phylink_config.mac_capabilities & MAC_10000FD)
cs_th = DPAA_CS_THRESHOLD_10G; else
cs_th = DPAA_CS_THRESHOLD_1G;
qm_cgr_cs_thres_set64(&initcgr.cgr.cs_thres, cs_th, 1);
if (dpaa_fq->init) {
memset(&initfq, 0, sizeof(initfq));
initfq.we_mask = cpu_to_be16(QM_INITFQ_WE_FQCTRL); /* Note: we may get to keep an empty FQ in cache */
initfq.fqd.fq_ctrl = cpu_to_be16(QM_FQCTRL_PREFERINCACHE);
/* Try to reduce the number of portal interrupts for * Tx Confirmation FQs.
*/ if (dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM)
initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_AVOIDBLOCK);
/* Put all egress queues in a congestion group of their own. * Sensu stricto, the Tx confirmation queues are Rx FQs, * rather than Tx - but they nonetheless account for the * memory footprint on behalf of egress traffic. We therefore * place them in the netdev's CGR, along with the Tx FQs.
*/ if (dpaa_fq->fq_type == FQ_TYPE_TX ||
dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM ||
dpaa_fq->fq_type == FQ_TYPE_TX_CONF_MQ) {
initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_CGID);
initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_CGE);
initfq.fqd.cgid = (u8)priv->cgr_data.cgr.cgrid; /* Set a fixed overhead accounting, in an attempt to * reduce the impact of fixed-size skb shells and the * driver's needed headroom on system memory. This is * especially the case when the egress traffic is * composed of small datagrams. * Unfortunately, QMan's OAL value is capped to an * insufficient value, but even that is better than * no overhead accounting at all.
*/
initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_OAC);
qm_fqd_set_oac(&initfq.fqd, QM_OAC_CG);
qm_fqd_set_oal(&initfq.fqd,
min(sizeof(struct sk_buff) +
priv->tx_headroom,
(size_t)FSL_QMAN_MAX_OAL));
}
staticint dpaa_bman_release(conststruct dpaa_bp *dpaa_bp, struct bm_buffer *bmb, int cnt)
{ int err;
err = bman_release(dpaa_bp->pool, bmb, cnt); /* Should never occur, address anyway to avoid leaking the buffers */ if (WARN_ON(err) && dpaa_bp->free_buf_cb) while (cnt-- > 0)
dpaa_bp->free_buf_cb(dpaa_bp, &bmb[cnt]);
return cnt;
}
staticvoid dpaa_release_sgt_members(struct qm_sg_entry *sgt)
{ struct bm_buffer bmb[DPAA_BUFF_RELEASE_MAX]; struct dpaa_bp *dpaa_bp; int i = 0, j;
memset(bmb, 0, sizeof(bmb));
do {
dpaa_bp = dpaa_bpid2pool(sgt[i].bpid); if (!dpaa_bp) return;
staticvoid count_ern(struct dpaa_percpu_priv *percpu_priv, constunion qm_mr_entry *msg)
{ switch (msg->ern.rc & QM_MR_RC_MASK) { case QM_MR_RC_CGR_TAILDROP:
percpu_priv->ern_cnt.cg_tdrop++; break; case QM_MR_RC_WRED:
percpu_priv->ern_cnt.wred++; break; case QM_MR_RC_ERROR:
percpu_priv->ern_cnt.err_cond++; break; case QM_MR_RC_ORPWINDOW_EARLY:
percpu_priv->ern_cnt.early_window++; break; case QM_MR_RC_ORPWINDOW_LATE:
percpu_priv->ern_cnt.late_window++; break; case QM_MR_RC_FQ_TAILDROP:
percpu_priv->ern_cnt.fq_tdrop++; break; case QM_MR_RC_ORPWINDOW_RETIRED:
percpu_priv->ern_cnt.fq_retired++; break; case QM_MR_RC_ORP_ZERO:
percpu_priv->ern_cnt.orp_zero++; break;
}
}
/* Turn on HW checksum computation for this outgoing frame. * If the current protocol is not something we support in this regard * (or if the stack has already computed the SW checksum), we do nothing. * * Returns 0 if all goes well (or HW csum doesn't apply), and a negative value * otherwise. * * Note that this function may modify the fd->cmd field and the skb data buffer * (the Parse Results area).
*/ staticint dpaa_enable_tx_csum(struct dpaa_priv *priv, struct sk_buff *skb, struct qm_fd *fd, void *parse_results)
{ struct fman_prs_result *parse_result;
u16 ethertype = ntohs(skb->protocol); struct ipv6hdr *ipv6h = NULL; struct iphdr *iph; int retval = 0;
u8 l4_proto;
if (skb->ip_summed != CHECKSUM_PARTIAL) return 0;
/* Note: L3 csum seems to be already computed in sw, but we can't choose * L4 alone from the FM configuration anyway.
*/
/* Fill in some fields of the Parse Results array, so the FMan * can find them as if they came from the FMan Parser.
*/
parse_result = (struct fman_prs_result *)parse_results;
/* If we're dealing with VLAN, get the real Ethernet type */ if (ethertype == ETH_P_8021Q)
ethertype = ntohs(skb_vlan_eth_hdr(skb)->h_vlan_encapsulated_proto);
/* Fill in the relevant L3 parse result fields * and read the L4 protocol type
*/ switch (ethertype) { case ETH_P_IP:
parse_result->l3r = cpu_to_be16(FM_L3_PARSE_RESULT_IPV4);
iph = ip_hdr(skb);
WARN_ON(!iph);
l4_proto = iph->protocol; break; case ETH_P_IPV6:
parse_result->l3r = cpu_to_be16(FM_L3_PARSE_RESULT_IPV6);
ipv6h = ipv6_hdr(skb);
WARN_ON(!ipv6h);
l4_proto = ipv6h->nexthdr; break; default: /* We shouldn't even be here */ if (net_ratelimit())
netif_alert(priv, tx_err, priv->net_dev, "Can't compute HW csum for L3 proto 0x%x\n",
ntohs(skb->protocol));
retval = -EIO; goto return_error;
}
/* Fill in the relevant L4 parse result fields */ switch (l4_proto) { case IPPROTO_UDP:
parse_result->l4r = FM_L4_PARSE_RESULT_UDP; break; case IPPROTO_TCP:
parse_result->l4r = FM_L4_PARSE_RESULT_TCP; break; default: if (net_ratelimit())
netif_alert(priv, tx_err, priv->net_dev, "Can't compute HW csum for L4 proto 0x%x\n",
l4_proto);
retval = -EIO; goto return_error;
}
/* At index 0 is IPOffset_1 as defined in the Parse Results */
parse_result->ip_off[0] = (u8)skb_network_offset(skb);
parse_result->l4_off = (u8)skb_transport_offset(skb);
/* Enable L3 (and L4, if TCP or UDP) HW checksum. */
fd->cmd |= cpu_to_be32(FM_FD_CMD_RPD | FM_FD_CMD_DTC);
/* On P1023 and similar platforms fd->cmd interpretation could * be disabled by setting CONTEXT_A bit ICMD; currently this bit * is not set so we do not need to check; in the future, if/when * using context_a we need to check this bit
*/
for (i = 0; i < 8; i++) {
p = dev_alloc_pages(0); if (unlikely(!p)) {
netdev_err(net_dev, "dev_alloc_pages() failed\n"); goto release_previous_buffs;
}
release_previous_buffs:
WARN_ONCE(1, "dpaa_eth: failed to add buffers on Rx\n");
bm_buffer_set64(&bmb[i], 0); /* Avoid releasing a completely null buffer; bman_release() requires * at least one buffer.
*/ if (likely(i)) goto release_bufs;
return 0;
}
staticint dpaa_bp_seed(struct dpaa_bp *dpaa_bp)
{ int i;
/* Give each CPU an allotment of "config_count" buffers */
for_each_possible_cpu(i) { int *count_ptr = per_cpu_ptr(dpaa_bp->percpu_count, i); int j;
/* Although we access another CPU's counters here * we do it at boot time so it is safe
*/ for (j = 0; j < dpaa_bp->config_count; j += 8)
*count_ptr += dpaa_bp_add_8_bufs(dpaa_bp);
} return 0;
}
/* Add buffers/(pages) for Rx processing whenever bpool count falls below * REFILL_THRESHOLD.
*/ staticint dpaa_eth_refill_bpool(struct dpaa_bp *dpaa_bp, int *countptr)
{ int count = *countptr; int new_bufs;
if (unlikely(count < FSL_DPAA_ETH_REFILL_THRESHOLD)) { do {
new_bufs = dpaa_bp_add_8_bufs(dpaa_bp); if (unlikely(!new_bufs)) { /* Avoid looping forever if we've temporarily * run out of memory. We'll try again at the * next NAPI cycle.
*/ break;
}
count += new_bufs;
} while (count < FSL_DPAA_ETH_MAX_BUF_COUNT);
*countptr = count; if (unlikely(count < FSL_DPAA_ETH_MAX_BUF_COUNT)) return -ENOMEM;
}
return 0;
}
staticint dpaa_eth_refill_bpools(struct dpaa_priv *priv)
{ struct dpaa_bp *dpaa_bp; int *countptr;
dpaa_bp = priv->dpaa_bp; if (!dpaa_bp) return -EINVAL;
countptr = this_cpu_ptr(dpaa_bp->percpu_count);
/* Cleanup function for outgoing frame descriptors that were built on Tx path, * either contiguous frames or scatter/gather ones. * Skb freeing is not handled here. * * This function may be called on error paths in the Tx function, so guard * against cases when not all fd relevant fields were filled in. To avoid * reading the invalid transmission timestamp for the error paths set ts to * false. * * Return the skb backpointer, since for S/G frames the buffer containing it * gets freed here. * * No skb backpointer is set when transmitting XDP frames. Cleanup the buffer * and return NULL in this case.
*/ staticstruct sk_buff *dpaa_cleanup_tx_fd(conststruct dpaa_priv *priv, conststruct qm_fd *fd, bool ts)
{ constenum dma_data_direction dma_dir = DMA_TO_DEVICE; struct device *dev = priv->net_dev->dev.parent; struct skb_shared_hwtstamps shhwtstamps;
dma_addr_t addr = qm_fd_addr(fd); void *vaddr = phys_to_virt(addr); conststruct qm_sg_entry *sgt; struct dpaa_eth_swbp *swbp; struct sk_buff *skb;
u64 ns; int i;
/* The sgt buffer has been allocated with netdev_alloc_frag(), * it's from lowmem.
*/
sgt = vaddr + qm_fd_get_offset(fd);
/* sgt[0] is from lowmem, was dma_map_single()-ed */
dma_unmap_single(priv->tx_dma_dev, qm_sg_addr(&sgt[0]),
qm_sg_entry_get_len(&sgt[0]), dma_dir);
/* remaining pages were mapped with skb_frag_dma_map() */ for (i = 1; (i < DPAA_SGT_MAX_ENTRIES) &&
!qm_sg_entry_is_final(&sgt[i - 1]); i++) {
WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
/* No skb backpointer is set when running XDP. An xdp_frame * backpointer is saved instead.
*/ if (!skb) {
xdp_return_frame(swbp->xdpf); return NULL;
}
/* DMA unmapping is required before accessing the HW provided info */ if (ts && priv->tx_tstamp &&
skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
memset(&shhwtstamps, 0, sizeof(shhwtstamps));
if (qm_fd_get_format(fd) == qm_fd_sg) /* Free the page that we allocated on Tx for the SGT */
free_pages((unsignedlong)vaddr, 0);
return skb;
}
static u8 rx_csum_offload(conststruct dpaa_priv *priv, conststruct qm_fd *fd)
{ /* The parser has run and performed L4 checksum validation. * We know there were no parser errors (and implicitly no * L4 csum error), otherwise we wouldn't be here.
*/ if ((priv->net_dev->features & NETIF_F_RXCSUM) &&
(be32_to_cpu(fd->status) & FM_FD_STAT_L4CV)) return CHECKSUM_UNNECESSARY;
/* We're here because either the parser didn't run or the L4 checksum * was not verified. This may include the case of a UDP frame with * checksum zero or an L4 proto other than TCP/UDP
*/ return CHECKSUM_NONE;
}
#define PTR_IS_ALIGNED(x, a) (IS_ALIGNED((unsignedlong)(x), (a)))
/* Build a linear skb around the received buffer. * We are guaranteed there is enough room at the end of the data buffer to * accommodate the shared info area of the skb.
*/ staticstruct sk_buff *contig_fd_to_skb(conststruct dpaa_priv *priv, conststruct qm_fd *fd)
{
ssize_t fd_off = qm_fd_get_offset(fd);
dma_addr_t addr = qm_fd_addr(fd); struct dpaa_bp *dpaa_bp; struct sk_buff *skb; void *vaddr;
/* Build an skb with the data of the first S/G entry in the linear portion and * the rest of the frame as skb fragments. * * The page fragment holding the S/G Table is recycled here.
*/ staticstruct sk_buff *sg_fd_to_skb(conststruct dpaa_priv *priv, conststruct qm_fd *fd)
{
ssize_t fd_off = qm_fd_get_offset(fd);
dma_addr_t addr = qm_fd_addr(fd); conststruct qm_sg_entry *sgt; struct page *page, *head_page; struct dpaa_bp *dpaa_bp; void *vaddr, *sg_vaddr; struct sk_buff *skb;
dma_addr_t sg_addr; int page_offset; unsignedint sz; int *count_ptr; int i, j;
/* Iterate through the SGT entries and add data buffers to the skb */
sgt = vaddr + fd_off;
skb = NULL; for (i = 0; i < DPAA_SGT_MAX_ENTRIES; i++) { /* Extension bit is not supported */
WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
/* We may use multiple Rx pools */
dpaa_bp = dpaa_bpid2pool(sgt[i].bpid); if (!dpaa_bp) goto free_buffers;
if (!skb) {
sz = dpaa_bp->size +
SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
skb = build_skb(sg_vaddr, sz); if (WARN_ON(!skb)) goto free_buffers;
skb->ip_summed = rx_csum_offload(priv, fd);
/* Make sure forwarded skbs will have enough space * on Tx, if extra headers are added.
*/
WARN_ON(fd_off != priv->rx_headroom); /* The offset to data start within the buffer holding * the SGT should always be equal to the offset to data * start within the first buffer holding the frame.
*/
WARN_ON_ONCE(fd_off != qm_sg_entry_get_off(&sgt[i]));
skb_reserve(skb, fd_off);
skb_put(skb, qm_sg_entry_get_len(&sgt[i]));
} else { /* Not the first S/G entry; all data from buffer will * be added in an skb fragment; fragment index is offset * by one since first S/G entry was incorporated in the * linear part of the skb. * * Caution: 'page' may be a tail page.
*/
page = virt_to_page(sg_vaddr);
head_page = virt_to_head_page(sg_vaddr);
/* Non-initial SGT entries should not have a buffer * offset.
*/
WARN_ON_ONCE(qm_sg_entry_get_off(&sgt[i]));
/* skb_add_rx_frag() does no checking on the page; if * we pass it a tail page, we'll end up with * bad page accounting and eventually with segfaults.
*/
skb_add_rx_frag(skb, i - 1, head_page, page_offset,
qm_sg_entry_get_len(&sgt[i]),
dpaa_bp->size);
}
/* Update the pool count for the current {cpu x bpool} */
count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
(*count_ptr)--;
if (qm_sg_entry_is_final(&sgt[i])) break;
}
WARN_ONCE(i == DPAA_SGT_MAX_ENTRIES, "No final bit on SGT\n");
/* free the SG table buffer */
free_pages((unsignedlong)vaddr, 0);
return skb;
free_buffers: /* free all the SG entries */ for (j = 0; j < DPAA_SGT_MAX_ENTRIES ; j++) {
sg_addr = qm_sg_addr(&sgt[j]);
sg_vaddr = phys_to_virt(sg_addr); /* all pages 0..i were unmaped */ if (j > i)
dma_unmap_page(priv->rx_dma_dev, qm_sg_addr(&sgt[j]),
DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
free_pages((unsignedlong)sg_vaddr, 0); /* counters 0..i-1 were decremented */ if (j >= i) {
dpaa_bp = dpaa_bpid2pool(sgt[j].bpid); if (dpaa_bp) {
count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
(*count_ptr)--;
}
}
if (qm_sg_entry_is_final(&sgt[j])) break;
} /* free the SGT fragment */
free_pages((unsignedlong)vaddr, 0);
/* We are guaranteed to have at least tx_headroom bytes * available, so just use that for offset.
*/
fd->bpid = FSL_DPAA_BPID_INV;
buff_start = skb->data - priv->tx_headroom;
dma_dir = DMA_TO_DEVICE;
/* Enable L3/L4 hardware checksum computation. * * We must do this before dma_map_single(DMA_TO_DEVICE), because we may * need to write into the skb.
*/
err = dpaa_enable_tx_csum(priv, skb, fd,
buff_start + DPAA_TX_PRIV_DATA_SIZE); if (unlikely(err < 0)) { if (net_ratelimit())
netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
err); return err;
}
/* Fill in the rest of the FD fields */
qm_fd_set_contig(fd, priv->tx_headroom, skb->len);
fd->cmd |= cpu_to_be32(FM_FD_CMD_FCO);
/* Map the entire buffer size that may be seen by FMan, but no more */
addr = dma_map_single(priv->tx_dma_dev, buff_start,
priv->tx_headroom + skb->len, dma_dir); if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) { if (net_ratelimit())
netif_err(priv, tx_err, net_dev, "dma_map_single() failed\n"); return -EINVAL;
}
qm_fd_addr_set64(fd, addr);
/* get a page to store the SGTable */
p = dev_alloc_pages(0); if (unlikely(!p)) {
netdev_err(net_dev, "dev_alloc_pages() failed\n"); return -ENOMEM;
}
buff_start = page_address(p);
/* Enable L3/L4 hardware checksum computation. * * We must do this before dma_map_single(DMA_TO_DEVICE), because we may * need to write into the skb.
*/
err = dpaa_enable_tx_csum(priv, skb, fd,
buff_start + DPAA_TX_PRIV_DATA_SIZE); if (unlikely(err < 0)) { if (net_ratelimit())
netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
err); goto csum_failed;
}
/* SGT[0] is used by the linear part */
sgt = (struct qm_sg_entry *)(buff_start + priv->tx_headroom);
frag_len = skb_headlen(skb);
qm_sg_entry_set_len(&sgt[0], frag_len);
sgt[0].bpid = FSL_DPAA_BPID_INV;
sgt[0].offset = 0;
addr = dma_map_single(priv->tx_dma_dev, skb->data,
skb_headlen(skb), dma_dir); if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
netdev_err(priv->net_dev, "DMA mapping failed\n");
err = -EINVAL; goto sg0_map_failed;
}
qm_sg_entry_set64(&sgt[0], addr);
/* populate the rest of SGT entries */ for (i = 0; i < nr_frags; i++) {
frag = &skb_shinfo(skb)->frags[i];
frag_len = skb_frag_size(frag);
WARN_ON(!skb_frag_page(frag));
addr = skb_frag_dma_map(priv->tx_dma_dev, frag, 0,
frag_len, dma_dir); if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
netdev_err(priv->net_dev, "DMA mapping failed\n");
err = -EINVAL; goto sg_map_failed;
}
/* check linear buffer alignment */ if (!PTR_IS_ALIGNED(skb->data, DPAA_A050385_ALIGN)) goto workaround;
/* linear buffers just need to have an aligned start */ if (!skb_is_nonlinear(skb)) return 0;
/* linear data size for nonlinear skbs needs to be aligned */ if (!IS_ALIGNED(skb_headlen(skb), DPAA_A050385_ALIGN)) goto workaround;
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
/* all fragments need to have aligned start addresses */ if (!IS_ALIGNED(skb_frag_off(frag), DPAA_A050385_ALIGN)) goto workaround;
/* all but last fragment need to have aligned sizes */ if (!IS_ALIGNED(skb_frag_size(frag), DPAA_A050385_ALIGN) &&
(i < skb_shinfo(skb)->nr_frags - 1)) goto workaround;
}
return 0;
workaround: /* copy all the skb content into a new linear buffer */
new_skb = netdev_alloc_skb(net_dev, skb->len + DPAA_A050385_ALIGN - 1 +
priv->tx_headroom); if (!new_skb) return -ENOMEM;
/* NET_SKB_PAD bytes already reserved, adding up to tx_headroom */
skb_reserve(new_skb, priv->tx_headroom - NET_SKB_PAD);
/* Workaround for DPAA_A050385 requires data start to be aligned */
start = PTR_ALIGN(new_skb->data, DPAA_A050385_ALIGN); if (start - new_skb->data)
skb_reserve(new_skb, start - new_skb->data);
/* Copy relevant timestamp info from the old skb to the new */ if (priv->tx_tstamp) {
skb_shinfo(new_skb)->tx_flags = skb_shinfo(skb)->tx_flags;
skb_shinfo(new_skb)->hwtstamps = skb_shinfo(skb)->hwtstamps;
skb_shinfo(new_skb)->tskey = skb_shinfo(skb)->tskey; if (skb->sk)
skb_set_owner_w(new_skb, skb->sk);
}
/* We move the headroom when we align it so we have to reset the * network and transport header offsets relative to the new data * pointer. The checksum offload relies on these offsets.
*/
skb_set_network_header(new_skb, skb_network_offset(skb));
skb_set_transport_header(new_skb, skb_transport_offset(skb));
/* Check the data alignment and make sure the headroom is large * enough to store the xdpf backpointer. Use an aligned headroom * value. * * Due to alignment constraints, we give XDP access to the full 256 * byte frame headroom. If the XDP program uses all of it, copy the * data to a new buffer and make room for storing the backpointer.
*/ if (PTR_IS_ALIGNED(xdpf->data, DPAA_FD_DATA_ALIGNMENT) &&
xdpf->headroom >= priv->tx_headroom) {
xdpf->headroom = priv->tx_headroom; return 0;
}
/* Try to move the data inside the buffer just enough to align it and * store the xdpf backpointer. If the available headroom isn't large * enough, resort to allocating a new buffer and copying the data.
*/
aligned_data = PTR_ALIGN_DOWN(xdpf->data, DPAA_FD_DATA_ALIGNMENT);
data_shift = xdpf->data - aligned_data;
/* The XDP frame's headroom needs to be large enough to accommodate * shifting the data as well as storing the xdpf backpointer.
*/ if (xdpf->headroom >= data_shift + priv->tx_headroom) {
memmove(aligned_data, xdpf->data, xdpf->len);
xdpf->data = aligned_data;
xdpf->headroom = priv->tx_headroom; return 0;
}
/* The new xdp_frame is stored in the new buffer. Reserve enough space * in the headroom for storing it along with the driver's private * info. The headroom needs to be aligned to DPAA_FD_DATA_ALIGNMENT to * guarantee the data's alignment in the buffer.
*/
headroom = ALIGN(sizeof(*new_xdpf) + priv->tx_headroom,
DPAA_FD_DATA_ALIGNMENT);
/* Assure the extended headroom and data don't overflow the buffer, * while maintaining the mandatory tailroom.
*/ if (headroom + xdpf->len > DPAA_BP_RAW_SIZE -
SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) return -ENOMEM;
p = dev_alloc_pages(0); if (unlikely(!p)) return -ENOMEM;
/* Copy the data to the new buffer at a properly aligned offset */
new_buff = page_address(p);
memcpy(new_buff + headroom, xdpf->data, xdpf->len);
/* Create an XDP frame around the new buffer in a similar fashion * to xdp_convert_buff_to_frame.
*/
new_xdpf = new_buff;
new_xdpf->data = new_buff + headroom;
new_xdpf->len = xdpf->len;
new_xdpf->headroom = priv->tx_headroom;
new_xdpf->frame_sz = DPAA_BP_RAW_SIZE;
new_xdpf->mem_type = MEM_TYPE_PAGE_ORDER0;
/* Release the initial buffer */
xdp_return_frame_rx_napi(xdpf);
/* Packet data is always read as 32-bit words, so zero out any part of * the skb which might be sent if we have to pad the packet
*/ if (__skb_put_padto(skb, ETH_ZLEN, false)) goto enomem;
nonlinear = skb_is_nonlinear(skb); if (!nonlinear) { /* We're going to store the skb backpointer at the beginning * of the data buffer, so we need a privately owned skb * * We've made sure skb is not shared in dev->priv_flags, * we need to verify the skb head is not cloned
*/ if (skb_cow_head(skb, priv->tx_headroom)) goto enomem;
WARN_ON(skb_is_nonlinear(skb));
}
/* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES; * make sure we don't feed FMan with more fragments than it supports.
*/ if (unlikely(nonlinear &&
(skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) { /* If the egress skb contains more fragments than we support * we have no choice but to linearize it ourselves.
*/ if (__skb_linearize(skb)) goto enomem;
nonlinear = skb_is_nonlinear(skb);
}
#ifdef CONFIG_DPAA_ERRATUM_A050385 if (unlikely(fman_has_errata_a050385())) { if (dpaa_a050385_wa_skb(net_dev, &skb)) goto enomem;
nonlinear = skb_is_nonlinear(skb);
} #endif
if (nonlinear) { /* Just create a S/G fd based on the skb */
err = skb_to_sg_fd(priv, skb, &fd);
percpu_priv->tx_frag_skbuffs++;
} else { /* Create a contig FD from this skb */
err = skb_to_contig_fd(priv, skb, &fd, &offset);
} if (unlikely(err < 0)) goto skb_to_fd_failed;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.