memset(f, 0, sizeof(struct tn40_fifo)); /* 1K extra space is allocated at the end of the fifo to simplify * processing of descriptors that wraps around fifo's end.
*/
f->va = dma_alloc_coherent(&priv->pdev->dev,
memsz + TN40_FIFO_EXTRA_SPACE, &f->da,
GFP_KERNEL); if (!f->va) return -ENOMEM;
db = vzalloc(size); if (db) {
db->stack = (int *)(db + 1);
db->elems = (void *)(db->stack + nelem);
db->nelem = nelem;
db->top = nelem; /* make the first alloc close to db struct */ for (i = 0; i < nelem; i++)
db->stack[i] = nelem - i - 1;
} return db;
}
/** * tn40_create_rx_ring - Initialize RX all related HW and SW resources * @priv: NIC private structure * * create_rx_ring creates rxf and rxd fifos, updates the relevant HW registers, * preallocates skbs for rx. It assumes that Rx is disabled in HW funcs are * grouped for better cache usage * * RxD fifo is smaller then RxF fifo by design. Upon high load, RxD will be * filled and packets will be dropped by the NIC without getting into the host * or generating interrupts. In this situation the host has no chance of * processing all the packets. Dropping packets by the NIC is cheaper, since it * takes 0 CPU cycles. * * Return: 0 on success and negative value on error.
*/ staticint tn40_create_rx_ring(struct tn40_priv *priv)
{ struct page_pool_params pp = {
.dev = &priv->pdev->dev,
.napi = &priv->napi,
.dma_dir = DMA_FROM_DEVICE,
.netdev = priv->ndev,
.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
.max_len = PAGE_SIZE,
}; int ret, pkt_size, nr;
priv->page_pool = page_pool_create(&pp); if (IS_ERR(priv->page_pool)) return PTR_ERR(priv->page_pool);
ret = tn40_fifo_alloc(priv, &priv->rxd_fifo0.m, priv->rxd_size,
TN40_REG_RXD_CFG0_0, TN40_REG_RXD_CFG1_0,
TN40_REG_RXD_RPTR_0, TN40_REG_RXD_WPTR_0); if (ret) goto err_destroy_page_pool;
ret = tn40_fifo_alloc(priv, &priv->rxf_fifo0.m, priv->rxf_size,
TN40_REG_RXF_CFG0_0, TN40_REG_RXF_CFG1_0,
TN40_REG_RXF_RPTR_0, TN40_REG_RXF_WPTR_0); if (ret) goto err_free_rxd;
/** * tn40_rx_alloc_buffers - Fill rxf fifo with buffers. * * @priv: NIC's private structure * * rx_alloc_buffers allocates buffers via the page pool API, builds rxf descs * and pushes them (rxf descr) into the rxf fifo. The pages are stored in rxdb. * To calculate the free space, we uses the cached values of RPTR and WPTR * when needed. This function also updates RPTR and WPTR.
*/ staticvoid tn40_rx_alloc_buffers(struct tn40_priv *priv)
{ struct tn40_rxf_fifo *f = &priv->rxf_fifo0; struct tn40_rxdb *db = priv->rxdb0; struct tn40_rx_map *dm; struct page *page; int dno, i, idx;
dno = tn40_rxdb_available(db) - 1; for (i = dno; i > 0; i--) {
page = page_pool_dev_alloc_pages(priv->page_pool); if (!page) break;
while (size > 0) {
rxdd = (struct tn40_rxd_desc *)(f->m.va + f->m.rptr);
db = priv->rxdb0;
/* We have a chicken and egg problem here. If the * descriptor is wrapped we first need to copy the tail * of the descriptor to the end of the buffer before * extracting values from the descriptor. However in * order to know if the descriptor is wrapped we need to * obtain the length of the descriptor from (the * wrapped) descriptor. Luckily the length is the first * word of the descriptor. Descriptor lengths are * multiples of 8 bytes so in case of a wrapped * descriptor the first 8 bytes guaranteed to appear * before the end of the buffer. We first obtain the * length, we then copy the rest of the descriptor if * needed and then extract the rest of the values from * the descriptor. * * Do not change the order of operations as it will * break the code!!!
*/
rxd_val1 = le32_to_cpu(rxdd->rxd_val1);
tmp_len = TN40_GET_RXD_BC(rxd_val1) << 3;
pkt_id = TN40_GET_RXD_PKT_ID(rxd_val1);
size -= tmp_len; /* CHECK FOR A PARTIALLY ARRIVED DESCRIPTOR */ if (size < 0) {
netdev_dbg(priv->ndev, "%s partially arrived desc tmp_len %d\n",
__func__, tmp_len); break;
} /* make sure that the descriptor fully is arrived * before reading the rest of the descriptor.
*/
rmb();
/* A special treatment is given to non-contiguous * descriptors that start near the end, wraps around * and continue at the beginning. The second part is * copied right after the first, and then descriptor * is interpreted as normal. The fifo has an extra * space to allow such operations.
*/
/* HAVE WE REACHED THE END OF THE QUEUE? */
f->m.rptr += tmp_len;
tmp_len = f->m.rptr - f->m.memsz; if (unlikely(tmp_len >= 0)) {
f->m.rptr = tmp_len; if (tmp_len > 0) { /* COPY PARTIAL DESCRIPTOR * TO THE END OF THE QUEUE
*/
netdev_dbg(priv->ndev, "wrapped desc rptr=%d tmp_len=%d\n",
f->m.rptr, tmp_len);
memcpy(f->m.va + f->m.memsz, f->m.va, tmp_len);
}
}
idx = le32_to_cpu(rxdd->va_lo);
dm = tn40_rxdb_addr_elem(db, idx);
prefetch(dm);
len = le16_to_cpu(rxdd->len);
rxd_vlan = le16_to_cpu(rxdd->rxd_vlan); /* CHECK FOR ERRORS */
rxd_err = TN40_GET_RXD_ERR(rxd_val1); if (unlikely(rxd_err)) {
u64_stats_update_begin(&priv->syncp);
priv->stats.rx_errors++;
u64_stats_update_end(&priv->syncp);
tn40_recycle_rx_buffer(priv, rxdd); continue;
}
if (unlikely(++done >= budget)) break;
}
u64_stats_update_begin(&priv->syncp);
priv->stats.rx_packets += done;
u64_stats_update_end(&priv->syncp); /* FIXME: Do something to minimize pci accesses */
tn40_write_reg(priv, f->m.reg_rptr, f->m.rptr & TN40_TXF_WPTR_WR_PTR);
tn40_rx_alloc_buffers(priv); return done;
}
/* TX HW/SW interaction overview * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * There are 2 types of TX communication channels between driver and NIC. * 1) TX Free Fifo - TXF - Holds ack descriptors for sent packets. * 2) TX Data Fifo - TXD - Holds descriptors of full buffers. * * Currently the NIC supports TSO, checksumming and gather DMA * UFO and IP fragmentation is on the way. * * RX SW Data Structures * ~~~~~~~~~~~~~~~~~~~~~ * TXDB is used to keep track of all skbs owned by SW and their DMA addresses. * For TX case, ownership lasts from getting the packet via hard_xmit and * until the HW acknowledges sending the packet by TXF descriptors. * TXDB is implemented as a cyclic buffer. * * FIFO objects keep info about the fifo's size and location, relevant HW * registers, usage and skb db. Each RXD and RXF fifo has their own fifo * structure. Implemented as simple struct. * * TX SW Execution Flow * ~~~~~~~~~~~~~~~~~~~~ * OS calls the driver's hard_xmit method with a packet to send. The driver * creates DMA mappings, builds TXD descriptors and kicks the HW by updating * TXD WPTR. * * When a packet is sent, The HW write a TXF descriptor and the SW * frees the original skb. To prevent TXD fifo overflow without * reading HW registers every time, the SW deploys "tx level" * technique. Upon startup, the tx level is initialized to TXD fifo * length. For every sent packet, the SW gets its TXD descriptor size * (from a pre-calculated array) and subtracts it from tx level. The * size is also stored in txdb. When a TXF ack arrives, the SW fetched * the size of the original TXD descriptor from the txdb and adds it * to the tx level. When the Tx level drops below some predefined * threshold, the driver stops the TX queue. When the TX level rises * above that level, the tx queue is enabled again. * * This technique avoids excessive reading of RPTR and WPTR registers. * As our benchmarks shows, it adds 1.5 Gbit/sec to NIC's throughput.
*/ staticvoid tn40_do_tx_db_ptr_next(struct tn40_txdb *db, struct tn40_tx_map **pptr)
{
++*pptr; if (unlikely(*pptr == db->end))
*pptr = db->start;
}
staticint tn40_tx_db_init(struct tn40_txdb *d, int sz_type)
{ int memsz = TN40_FIFO_SIZE * (1 << (sz_type + 1));
d->start = vzalloc(memsz); if (!d->start) return -ENOMEM; /* In order to differentiate between an empty db state and a full db * state at least one element should always be empty in order to * avoid rptr == wptr, which means that the db is empty.
*/
d->size = memsz / sizeof(struct tn40_tx_map) - 1;
d->end = d->start + d->size + 1; /* just after last element */
/* All dbs are created empty */
d->rptr = d->start;
d->wptr = d->start; return 0;
}
/** * tn40_tx_map_skb - create and store DMA mappings for skb's data blocks * @priv: NIC private structure * @skb: socket buffer to map * @txdd: pointer to tx descriptor to be updated * @pkt_len: pointer to unsigned long value * * This function creates DMA mappings for skb's data blocks and writes them to * PBL of a new tx descriptor. It also stores them in the tx db, so they could * be unmapped after the data has been sent. It is the responsibility of the * caller to make sure that there is enough space in the txdb. The last * element holds a pointer to skb itself and is marked with a zero length. * * Return: 0 on success and negative value on error.
*/ staticint tn40_tx_map_skb(struct tn40_priv *priv, struct sk_buff *skb, struct tn40_txd_desc *txdd, unsignedint *pkt_len)
{ struct tn40_mapping_info info[TN40_MAX_PBL]; int nr_frags = skb_shinfo(skb)->nr_frags; struct tn40_pbl *pbl = &txdd->pbl[0]; struct tn40_txdb *db = &priv->txdb; unsignedint size; int i, len, ret;
dma_addr_t dma;
netdev_dbg(priv->ndev, "TX skb %p skbLen %d dataLen %d frags %d\n", skb,
skb->len, skb->data_len, nr_frags); if (nr_frags > TN40_MAX_PBL - 1) {
ret = skb_linearize(skb); if (ret) return ret;
nr_frags = skb_shinfo(skb)->nr_frags;
} /* initial skb */
len = skb->len - skb->data_len;
dma = dma_map_single(&priv->pdev->dev, skb->data, len,
DMA_TO_DEVICE);
ret = dma_mapping_error(&priv->pdev->dev, dma); if (ret) return ret;
staticint tn40_create_tx_ring(struct tn40_priv *priv)
{ int ret;
ret = tn40_fifo_alloc(priv, &priv->txd_fifo0.m, priv->txd_size,
TN40_REG_TXD_CFG0_0, TN40_REG_TXD_CFG1_0,
TN40_REG_TXD_RPTR_0, TN40_REG_TXD_WPTR_0); if (ret) return ret;
ret = tn40_fifo_alloc(priv, &priv->txf_fifo0.m, priv->txf_size,
TN40_REG_TXF_CFG0_0, TN40_REG_TXF_CFG1_0,
TN40_REG_TXF_RPTR_0, TN40_REG_TXF_WPTR_0); if (ret) goto err_free_txd;
/* The TX db has to keep mappings for all packets sent (on * TxD) and not yet reclaimed (on TxF).
*/
ret = tn40_tx_db_init(&priv->txdb, max(priv->txd_size, priv->txf_size)); if (ret) goto err_free_txf;
/** * tn40_tx_space - Calculate the available space in the TX fifo. * @priv: NIC private structure * * Return: available space in TX fifo in bytes
*/ staticint tn40_tx_space(struct tn40_priv *priv)
{ struct tn40_txd_fifo *f = &priv->txd_fifo0; int fsize;
/* Increment TXD write pointer. In case of fifo wrapping copy * reminder of the descriptor to the beginning.
*/
f->m.wptr += tn40_txd_sizes[nr_frags].bytes;
len = f->m.wptr - f->m.memsz; if (unlikely(len >= 0)) {
f->m.wptr = len; if (len > 0)
memcpy(f->m.va, f->m.va + f->m.memsz, len);
} /* Force memory writes to complete before letting the HW know * there are new descriptors to fetch.
*/
wmb();
netif_tx_lock(priv->ndev); while (f->m.wptr != f->m.rptr) {
f->m.rptr += TN40_TXF_DESC_SZ;
f->m.rptr &= f->m.size_mask; /* Unmap all fragments */ /* First has to come tx_maps containing DMA */ do {
dma_addr_t addr = db->rptr->addr.dma;
size_t size = db->rptr->len;
netif_tx_unlock(priv->ndev);
dma_unmap_page(&priv->pdev->dev, addr,
size, DMA_TO_DEVICE);
netif_tx_lock(priv->ndev);
tn40_tx_db_inc_rptr(db);
} while (db->rptr->len > 0);
tx_level -= db->rptr->len; /* '-' Because the len is negative */
/* Now should come skb pointer - free it */
dev_kfree_skb_any(db->rptr->addr.skb);
netdev_dbg(priv->ndev, "dev_kfree_skb_any %p %d\n",
db->rptr->addr.skb, -db->rptr->len);
tn40_tx_db_inc_rptr(db);
}
/* Let the HW know which TXF descriptors were cleaned */
tn40_write_reg(priv, f->m.reg_rptr, f->m.rptr & TN40_TXF_WPTR_WR_PTR);
/* We reclaimed resources, so in case the Q is stopped by xmit * callback, we resume the transmission and use tx_lock to * synchronize with xmit.
*/
priv->tx_level += tx_level; if (priv->tx_noupd) {
priv->tx_noupd = 0;
tn40_write_reg(priv, priv->txd_fifo0.m.reg_wptr,
priv->txd_fifo0.m.wptr & TN40_TXF_WPTR_WR_PTR);
} if (unlikely(netif_queue_stopped(priv->ndev) &&
netif_carrier_ok(priv->ndev) &&
(priv->tx_level >= TN40_MAX_TX_LEVEL / 2))) {
netdev_dbg(priv->ndev, "TX Q WAKE level %d\n", priv->tx_level);
netif_wake_queue(priv->ndev);
}
netif_tx_unlock(priv->ndev);
}
/** * tn40_tx_push_desc - Push a descriptor to TxD fifo. * * @priv: NIC private structure * @data: desc's data * @size: desc's size * * This function pushes desc to TxD fifo and overlaps it if needed. * * This function does not check for available space, nor does it check * that the data size is smaller than the fifo size. Checking for * space is the responsibility of the caller.
*/ staticvoid tn40_tx_push_desc(struct tn40_priv *priv, void *data, int size)
{ struct tn40_txd_fifo *f = &priv->txd_fifo0; int i = f->m.memsz - f->m.wptr;
if (size == 0) return;
if (i > size) {
memcpy(f->m.va + f->m.wptr, data, size);
f->m.wptr += size;
} else {
memcpy(f->m.va + f->m.wptr, data, i);
f->m.wptr = size - i;
memcpy(f->m.va, data + i, f->m.wptr);
}
tn40_write_reg(priv, f->m.reg_wptr, f->m.wptr & TN40_TXF_WPTR_WR_PTR);
}
/** * tn40_tx_push_desc_safe - push descriptor to TxD fifo in a safe way. * * @priv: NIC private structure * @data: descriptor data * @size: descriptor size * * This function does check for available space and, if necessary, * waits for the NIC to read existing data before writing new data.
*/ staticvoid tn40_tx_push_desc_safe(struct tn40_priv *priv, void *data, int size)
{ int timer = 0;
while (size > 0) { /* We subtract 8 because when the fifo is full rptr == * wptr, which also means that fifo is empty, we can * understand the difference, but could the HW do the * same ???
*/ int avail = tn40_tx_space(priv) - 8;
if (avail <= 0) { if (timer++ > 300) /* Prevent endless loop */ break; /* Give the HW a chance to clean the fifo */
usleep_range(50, 60); continue;
}
avail = min(avail, size);
netdev_dbg(priv->ndev, "about to push %d bytes starting %p size %d\n",
avail, data, size);
tn40_tx_push_desc(priv, data, avail);
size -= avail;
data += avail;
}
}
int tn40_set_link_speed(struct tn40_priv *priv, u32 speed)
{
u32 val; int i;
netdev_dbg(priv->ndev, "speed %d\n", speed); switch (speed) { case SPEED_10000: case SPEED_5000: case SPEED_2500:
netdev_dbg(priv->ndev, "link_speed %d\n", speed);
if (unlikely(!isr)) {
tn40_enable_interrupts(priv); return IRQ_NONE; /* Not our interrupt */
}
if (isr & TN40_IR_EXTRA)
tn40_isr_extra(priv, isr);
if (isr & (TN40_IR_RX_DESC_0 | TN40_IR_TX_FREE_0 | TN40_IR_TMR1)) { if (likely(napi_schedule_prep(&priv->napi))) {
__napi_schedule(&priv->napi); return IRQ_HANDLED;
} /* We get here if an interrupt has slept into the * small time window between these lines in * tn40_poll: tn40_enable_interrupts(priv); return 0; * * Currently interrupts are disabled (since we read * the ISR register) and we have failed to register * the next poll. So we read the regs to trigger the * chip and allow further interrupts.
*/
tn40_read_reg(priv, TN40_REG_TXF_WPTR_0);
tn40_read_reg(priv, TN40_REG_RXD_WPTR_0);
}
/* Check that the PLLs are locked and reset ended */
val = read_poll_timeout(tn40_read_reg, val,
(val & TN40_CLKPLL_LKD) == TN40_CLKPLL_LKD,
10000, 700000, false, priv, TN40_REG_CLKPLL); if (val) return -EIO;
usleep_range(50, 60); /* Do any PCI-E read transaction */
tn40_read_reg(priv, TN40_REG_RXD_CFG0_0); return 0;
}
staticvoid tn40_sw_reset(struct tn40_priv *priv)
{ int i, ret;
u32 val;
/* 1. load MAC (obsolete) */ /* 2. disable Rx (and Tx) */
tn40_write_reg(priv, TN40_REG_GMAC_RXF_A, 0);
msleep(100); /* 3. Disable port */
tn40_write_reg(priv, TN40_REG_DIS_PORT, 1); /* 4. Disable queue */
tn40_write_reg(priv, TN40_REG_DIS_QU, 1); /* 5. Wait until hw is disabled */
ret = read_poll_timeout(tn40_read_reg, val, val & 1, 10000, 500000, false, priv, TN40_REG_RST_PORT); if (ret)
netdev_err(priv->ndev, "SW reset timeout. continuing anyway\n");
netdev_dbg(priv->ndev, "vid =%d value =%d\n", (int)vid, enable);
reg = TN40_REG_VLAN_0 + (vid / 32) * 4;
bit = 1 << vid % 32;
val = tn40_read_reg(priv, reg);
netdev_dbg(priv->ndev, "reg =%x, val =%x, bit =%d\n", reg, val, bit); if (enable)
val |= bit; else
val &= ~bit;
netdev_dbg(priv->ndev, "new val %x\n", val);
tn40_write_reg(priv, reg, val);
}
/* FIXME: RXE(OFF) */ if (ndev->flags & IFF_PROMISC) {
rxf_val |= TN40_GMAC_RX_FILTER_PRM;
} elseif (ndev->flags & IFF_ALLMULTI) { /* set IMF to accept all multicast frames */ for (i = 0; i < TN40_MAC_MCST_HASH_NUM; i++)
tn40_write_reg(priv,
TN40_REG_RX_MCST_HASH0 + i * 4, ~0);
} elseif (netdev_mc_count(ndev)) { struct netdev_hw_addr *mclist;
u32 reg, val;
u8 hash;
/* Set IMF to deny all multicast frames */ for (i = 0; i < TN40_MAC_MCST_HASH_NUM; i++)
tn40_write_reg(priv,
TN40_REG_RX_MCST_HASH0 + i * 4, 0);
/* Set PMF to deny all multicast frames */ for (i = 0; i < TN40_MAC_MCST_NUM; i++) {
tn40_write_reg(priv,
TN40_REG_RX_MAC_MCST0 + i * 8, 0);
tn40_write_reg(priv,
TN40_REG_RX_MAC_MCST1 + i * 8, 0);
} /* Use PMF to accept first MAC_MCST_NUM (15) addresses */
/* TBD: Sort the addresses and write them in ascending * order into RX_MAC_MCST regs. we skip this phase now * and accept ALL multicast frames through IMF. Accept * the rest of addresses throw IMF.
*/
netdev_for_each_mc_addr(mclist, ndev) {
hash = 0; for (i = 0; i < ETH_ALEN; i++)
hash ^= mclist->addr[i];
staticint tn40_priv_init(struct tn40_priv *priv)
{ int ret;
tn40_set_link_speed(priv, 0);
/* Set GPIO[9:0] to output 0 */
tn40_write_reg(priv, 0x51E0, 0x30010006); /* GPIO_OE_ WR CMD */
tn40_write_reg(priv, 0x51F0, 0x0); /* GPIO_OE_ DATA */
tn40_write_reg(priv, TN40_REG_MDIO_CMD_STAT, 0x3ec8);
/* we use tx descriptors to load a firmware. */
ret = tn40_create_tx_ring(priv); if (ret) return ret;
ret = tn40_fw_load(priv);
tn40_destroy_tx_ring(priv); return ret;
}
ret = pci_enable_device(pdev); if (ret) return ret;
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); if (ret) {
dev_err(&pdev->dev, "failed to set DMA mask.\n"); goto err_disable_device;
}
ret = pci_request_regions(pdev, TN40_DRV_NAME); if (ret) {
dev_err(&pdev->dev, "failed to request PCI regions.\n"); goto err_disable_device;
}
pci_set_master(pdev);
regs = pci_iomap(pdev, 0, TN40_REGS_SIZE); if (!regs) {
ret = -EIO;
dev_err(&pdev->dev, "failed to map PCI bar.\n"); goto err_free_regions;
}
ndev = tn40_netdev_alloc(pdev); if (!ndev) {
ret = -ENOMEM;
dev_err(&pdev->dev, "failed to allocate netdev.\n"); goto err_iounmap;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.