/* * This file is provided under a dual BSD/GPLv2 license. When using or * redistributing this file, you may do so under either license. * * GPL LICENSE SUMMARY * * Copyright(c) 2012 Intel Corporation. All rights reserved. * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as * published by the Free Software Foundation. * * BSD LICENSE * * Copyright(c) 2012 Intel Corporation. All rights reserved. * Copyright (C) 2015 EMC Corporation. All Rights Reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copy * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * PCIe NTB Transport Linux driver * * Contact Information: * Jon Mason <jon.mason@intel.com>
*/ #include <linux/debugfs.h> #include <linux/delay.h> #include <linux/dmaengine.h> #include <linux/dma-mapping.h> #include <linux/errno.h> #include <linux/export.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/pci.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/uaccess.h> #include"linux/ntb.h" #include"linux/ntb_transport.h"
staticunsignedlong max_mw_size;
module_param(max_mw_size, ulong, 0644);
MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
staticunsignedint transport_mtu = 0x10000;
module_param(transport_mtu, uint, 0644);
MODULE_PARM_DESC(transport_mtu, "Maximum size of NTB transport packets");
staticunsignedchar max_num_clients;
module_param(max_num_clients, byte, 0644);
MODULE_PARM_DESC(max_num_clients, "Maximum number of NTB transport clients");
staticunsignedint copy_bytes = 1024;
module_param(copy_bytes, uint, 0644);
MODULE_PARM_DESC(copy_bytes, "Threshold under which NTB will use the CPU to copy instead of DMA");
staticbool use_dma;
module_param(use_dma, bool, 0644);
MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
/* Only two-ports NTB devices are supported */ #define PIDX NTB_DEF_PEER_IDX
struct ntb_queue_entry { /* ntb_queue list reference */ struct list_head entry; /* pointers to data to be transferred */ void *cb_data; void *buf; unsignedint len; unsignedint flags; int retries; int errors; unsignedint tx_index; unsignedint rx_index;
/** * ntb_transport_register_client_dev - Register NTB client device * @device_name: Name of NTB client device * * Register an NTB client device with the NTB transport layer * * Returns: %0 on success or -errno code on error
*/ int ntb_transport_register_client_dev(char *device_name)
{ struct ntb_transport_client_dev *client_dev; struct ntb_transport_ctx *nt; int node; int rc, i = 0;
if (list_empty(&ntb_transport_list)) return -ENODEV;
/** * ntb_transport_register_client - Register NTB client driver * @drv: NTB client driver to be registered * * Register an NTB client driver with the NTB transport layer * * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
*/ int ntb_transport_register_client(struct ntb_transport_client *drv)
{
drv->driver.bus = &ntb_transport_bus;
if (list_empty(&ntb_transport_list)) return -ENODEV;
/** * ntb_transport_unregister_client - Unregister NTB client driver * @drv: NTB client driver to be unregistered * * Unregister an NTB client driver with the NTB transport layer * * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
*/ void ntb_transport_unregister_client(struct ntb_transport_client *drv)
{
driver_unregister(&drv->driver);
}
EXPORT_SYMBOL_GPL(ntb_transport_unregister_client);
/* Due to housekeeping, there must be atleast 2 buffs */
qp->rx_max_frame = min(transport_mtu, rx_size / 2);
qp->rx_max_entry = rx_size / qp->rx_max_frame;
qp->rx_index = 0;
/* * Checking to see if we have more entries than the default. * We should add additional entries if that is the case so we * can be in sync with the transport frames.
*/
node = dev_to_node(&ndev->dev); for (i = qp->rx_alloc_entry; i < qp->rx_max_entry; i++) {
entry = kzalloc_node(sizeof(*entry), GFP_KERNEL, node); if (!entry) return -ENOMEM;
/* * The buffer here is allocated against the NTB device. The reason to * use dma_alloc_*() call is to allocate a large IOVA contiguous buffer * backing the NTB BAR for the remote host to write to. During receive * processing, the data is being copied out of the receive buffer to * the kernel skbuff. When a DMA device is being used, dma_map_page() * is called on the kvaddr of the receive buffer (from dma_alloc_*()) * and remapped against the DMA device. It appears to be a double * DMA mapping of buffers, but first is mapped to the NTB device and * second is to the DMA device. DMA_ATTR_FORCE_CONTIGUOUS is necessary * in order for the later dma_map_page() to not fail.
*/
alloc_addr = dma_alloc_attrs(ntb_dev, mw->alloc_size,
&dma_addr, GFP_KERNEL,
DMA_ATTR_FORCE_CONTIGUOUS); if (!alloc_addr) {
dev_err(ntb_dev, "Unable to alloc MW buff of size %zu\n",
mw->alloc_size); return -ENOMEM;
}
virt_addr = alloc_addr;
/* * we must ensure that the memory address allocated is BAR size * aligned in order for the XLAT register to take the value. This * is a requirement of the hardware. It is recommended to setup CMA * for BAR sizes equal or greater than 4MB.
*/ if (!IS_ALIGNED(dma_addr, align)) { if (mw->alloc_size > mw->buff_size) {
virt_addr = PTR_ALIGN(alloc_addr, align);
dma_addr = ALIGN(dma_addr, align);
} else {
rc = -ENOMEM; goto err;
}
}
/* Pass along the info to any clients */ for (i = 0; i < nt->qp_count; i++) if (qp_bitmap_alloc & BIT_ULL(i)) {
qp = &nt->qp_vec[i];
ntb_qp_link_cleanup(qp);
cancel_work_sync(&qp->link_cleanup);
cancel_delayed_work_sync(&qp->link_work);
}
if (!nt->link_is_up)
cancel_delayed_work_sync(&nt->link_work);
for (i = 0; i < nt->mw_count; i++)
ntb_free_mw(nt, i);
/* The scratchpad registers keep the values if the remote side * goes down, blast them now to give them a sane value the next * time they are accessed
*/
count = ntb_spad_count(nt->ndev); for (i = 0; i < count; i++)
ntb_spad_write(nt->ndev, i, 0);
}
/* Query the remote side for its info */
val = ntb_spad_read(ndev, VERSION);
dev_dbg(&pdev->dev, "Remote version = %d\n", val); if (val != NTB_TRANSPORT_VERSION) goto out;
val = ntb_spad_read(ndev, NUM_QPS);
dev_dbg(&pdev->dev, "Remote max number of qps = %d\n", val); if (val != nt->qp_count) goto out;
val = ntb_spad_read(ndev, NUM_MWS);
dev_dbg(&pdev->dev, "Remote number of mws = %d\n", val); if (val != nt->mw_count) goto out;
for (i = 0; i < nt->mw_count; i++) {
u64 val64;
val = ntb_spad_read(ndev, MW0_SZ_HIGH + (i * 2));
val64 = (u64)val << 32;
val = ntb_spad_read(ndev, MW0_SZ_LOW + (i * 2));
val64 |= val;
dev_dbg(&pdev->dev, "Remote MW%d size = %#llx\n", i, val64);
rc = ntb_set_mw(nt, i, val64); if (rc) goto out1;
}
nt->link_is_up = true;
for (i = 0; i < nt->qp_count; i++) { struct ntb_transport_qp *qp = &nt->qp_vec[i];
ntb_peer_spad_write(nt->ndev, PIDX, QP_LINKS, val | BIT(qp->qp_num));
/* query remote spad for qp ready bits */
dev_dbg_ratelimited(&pdev->dev, "Remote QP link status = %x\n", val);
/* See if the remote side is up */ if (val & BIT(qp->qp_num)) {
dev_info(&pdev->dev, "qp %d: Link Up\n", qp->qp_num);
qp->link_is_up = true;
qp->active = true;
if (qp->event_handler)
qp->event_handler(qp->cb_data, qp->link_is_up);
if (qp->active)
tasklet_schedule(&qp->rxc_db_work);
} elseif (nt->link_is_up)
schedule_delayed_work(&qp->link_work,
msecs_to_jiffies(NTB_LINK_DOWN_TIMEOUT));
}
/* Due to housekeeping, there must be atleast 2 buffs */
qp->tx_max_frame = min(transport_mtu, tx_size / 2);
qp->tx_max_entry = tx_size / qp->tx_max_frame;
if (!ndev->ops->mw_set_trans) {
dev_err(&ndev->dev, "Inbound MW based NTB API is required\n"); return -EINVAL;
}
if (ntb_db_is_unsafe(ndev))
dev_dbg(&ndev->dev, "doorbell is unsafe, proceed anyway...\n"); if (ntb_spad_is_unsafe(ndev))
dev_dbg(&ndev->dev, "scratchpad is unsafe, proceed anyway...\n");
if (ntb_peer_port_count(ndev) != NTB_DEF_PEER_CNT)
dev_warn(&ndev->dev, "Multi-port NTB devices unsupported\n");
node = dev_to_node(&ndev->dev);
nt = kzalloc_node(sizeof(*nt), GFP_KERNEL, node); if (!nt) return -ENOMEM;
nt->ndev = ndev;
/* * If we are using MSI, and have at least one extra memory window, * we will reserve the last MW for the MSI window.
*/ if (use_msi && mw_count > 1) {
rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed); if (!rc) {
mw_count -= 1;
nt->use_msi = true;
}
}
spad_count = ntb_spad_count(ndev);
/* Limit the MW's based on the availability of scratchpads */
/* verify that all the qp's are freed */ for (i = 0; i < nt->qp_count; i++) {
qp = &nt->qp_vec[i]; if (qp_bitmap_alloc & BIT_ULL(i))
ntb_transport_free_queue(qp);
debugfs_remove_recursive(qp->debugfs_dir);
}
ntb_link_disable(ndev);
ntb_clear_ctx(ndev);
ntb_bus_remove(nt);
for (i = nt->mw_count; i--; ) {
ntb_free_mw(nt, i);
iounmap(nt->mw_vec[i].vbase);
}
/* Limit the number of packets processed in a single interrupt to * provide fairness to others
*/ for (i = 0; i < qp->rx_max_entry; i++) {
rc = ntb_process_rxc(qp); if (rc) break;
}
if (i && qp->rx_dma_chan)
dma_async_issue_pending(qp->rx_dma_chan);
if (i == qp->rx_max_entry) { /* there is more work to do */ if (qp->active)
tasklet_schedule(&qp->rxc_db_work);
} elseif (ntb_db_read(qp->ndev) & BIT_ULL(qp->qp_num)) { /* the doorbell bit is set: clear it */
ntb_db_clear(qp->ndev, BIT_ULL(qp->qp_num)); /* ntb_db_read ensures ntb_db_clear write is committed */
ntb_db_read(qp->ndev);
/* an interrupt may have arrived between finishing * ntb_process_rxc and clearing the doorbell bit: * there might be some more work to do.
*/ if (qp->active)
tasklet_schedule(&qp->rxc_db_work);
}
}
if (qp->use_msi)
ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc); else
ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
/* The entry length can only be zero if the packet is intended to be a * "link down" or similar. Since no payload is being sent in these * cases, there is nothing to add to the completion queue.
*/ if (entry->len > 0) {
qp->tx_bytes += entry->len;
if (qp->tx_handler)
qp->tx_handler(qp, qp->cb_data, entry->cb_data,
entry->len);
}
staticvoid ntb_memcpy_tx(struct ntb_queue_entry *entry, void __iomem *offset)
{ #ifdef ARCH_HAS_NOCACHE_UACCESS /* * Using non-temporal mov to improve performance on non-cached * writes, even though we aren't actually copying from user space.
*/
__copy_from_user_inatomic_nocache(offset, entry->buf, entry->len); #else
memcpy_toio(offset, entry->buf, entry->len); #endif
/* Ensure that the data is fully copied out before setting the flags */
wmb();
/** * ntb_transport_create_queue - Create a new NTB transport layer queue * @data: pointer for callback data * @client_dev: &struct device pointer * @handlers: pointer to various ntb queue (callback) handlers * * Create a new NTB transport layer queue and provide the queue with a callback * routine for both transmit and receive. The receive callback routine will be * used to pass up data when the transport has received it on the queue. The * transmit callback routine will be called when the transport has completed the * transmission of the data on the queue and the data is ready to be freed. * * RETURNS: pointer to newly created ntb_queue, NULL on error.
*/ struct ntb_transport_qp *
ntb_transport_create_queue(void *data, struct device *client_dev, conststruct ntb_queue_handlers *handlers)
{ struct ntb_dev *ndev; struct pci_dev *pdev; struct ntb_transport_ctx *nt; struct ntb_queue_entry *entry; struct ntb_transport_qp *qp;
u64 qp_bit; unsignedint free_queue;
dma_cap_mask_t dma_mask; int node; int i;
ndev = dev_ntb(client_dev->parent);
pdev = ndev->pdev;
nt = ndev->ctx;
node = dev_to_node(&ndev->dev);
free_queue = ffs(nt->qp_bitmap_free); if (!free_queue) goto err;
/* decrement free_queue to make it zero based */
free_queue--;
dev_info(&pdev->dev, "NTB Transport QP %d created\n", qp->qp_num);
return qp;
err2: while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
kfree(entry);
err1:
qp->rx_alloc_entry = 0; while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
kfree(entry); if (qp->tx_mw_dma_addr)
dma_unmap_resource(qp->tx_dma_chan->device->dev,
qp->tx_mw_dma_addr, qp->tx_mw_size,
DMA_FROM_DEVICE, 0); if (qp->tx_dma_chan)
dma_release_channel(qp->tx_dma_chan); if (qp->rx_dma_chan)
dma_release_channel(qp->rx_dma_chan);
nt->qp_bitmap_free |= qp_bit;
err: return NULL;
}
EXPORT_SYMBOL_GPL(ntb_transport_create_queue);
/** * ntb_transport_free_queue - Frees NTB transport queue * @qp: NTB queue to be freed * * Frees NTB transport queue
*/ void ntb_transport_free_queue(struct ntb_transport_qp *qp)
{ struct pci_dev *pdev; struct ntb_queue_entry *entry;
u64 qp_bit;
if (!qp) return;
pdev = qp->ndev->pdev;
qp->active = false;
if (qp->tx_dma_chan) { struct dma_chan *chan = qp->tx_dma_chan; /* Putting the dma_chan to NULL will force any new traffic to be * processed by the CPU instead of the DAM engine
*/
qp->tx_dma_chan = NULL;
/* Try to be nice and wait for any queued DMA engine * transactions to process before smashing it with a rock
*/
dma_sync_wait(chan, qp->last_cookie);
dmaengine_terminate_all(chan);
if (qp->rx_dma_chan) { struct dma_chan *chan = qp->rx_dma_chan; /* Putting the dma_chan to NULL will force any new traffic to be * processed by the CPU instead of the DAM engine
*/
qp->rx_dma_chan = NULL;
/* Try to be nice and wait for any queued DMA engine * transactions to process before smashing it with a rock
*/
dma_sync_wait(chan, qp->last_cookie);
dmaengine_terminate_all(chan);
dma_release_channel(chan);
}
while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q)))
kfree(entry);
while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q))) {
dev_warn(&pdev->dev, "Freeing item from non-empty rx_pend_q\n");
kfree(entry);
}
while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q))) {
dev_warn(&pdev->dev, "Freeing item from non-empty rx_post_q\n");
kfree(entry);
}
while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
kfree(entry);
qp->transport->qp_bitmap_free |= qp_bit;
dev_info(&pdev->dev, "NTB Transport QP %d freed\n", qp->qp_num);
}
EXPORT_SYMBOL_GPL(ntb_transport_free_queue);
/** * ntb_transport_rx_remove - Dequeues enqueued rx packet * @qp: NTB queue to be freed * @len: pointer to variable to write enqueued buffers length * * Dequeues unused buffers from receive queue. Should only be used during * shutdown of qp. * * RETURNS: NULL error value on error, or void* for success.
*/ void *ntb_transport_rx_remove(struct ntb_transport_qp *qp, unsignedint *len)
{ struct ntb_queue_entry *entry; void *buf;
if (!qp || qp->client_ready) return NULL;
entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q); if (!entry) return NULL;
/** * ntb_transport_rx_enqueue - Enqueue a new NTB queue entry * @qp: NTB transport layer queue the entry is to be enqueued on * @cb: per buffer pointer for callback function to use * @data: pointer to data buffer that incoming packets will be copied into * @len: length of the data buffer * * Enqueue a new receive buffer onto the transport queue into which a NTB * payload can be received into. * * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
*/ int ntb_transport_rx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, unsignedint len)
{ struct ntb_queue_entry *entry;
if (!qp) return -EINVAL;
entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_free_q); if (!entry) return -ENOMEM;
/** * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry * @qp: NTB transport layer queue the entry is to be enqueued on * @cb: per buffer pointer for callback function to use * @data: pointer to data buffer that will be sent * @len: length of the data buffer * * Enqueue a new transmit buffer onto the transport queue from which a NTB * payload will be transmitted. This assumes that a lock is being held to * serialize access to the qp. * * RETURNS: An appropriate -ERRNO error value on error, or zero for success.
*/ int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data, unsignedint len)
{ struct ntb_queue_entry *entry; int rc;
if (!qp || !len) return -EINVAL;
/* If the qp link is down already, just ignore. */ if (!qp->link_is_up) return 0;
/** * ntb_transport_link_up - Notify NTB transport of client readiness to use queue * @qp: NTB transport layer queue to be enabled * * Notify NTB transport layer of client readiness to use queue
*/ void ntb_transport_link_up(struct ntb_transport_qp *qp)
{ if (!qp) return;
qp->client_ready = true;
if (qp->transport->link_is_up)
schedule_delayed_work(&qp->link_work, 0);
}
EXPORT_SYMBOL_GPL(ntb_transport_link_up);
/** * ntb_transport_link_down - Notify NTB transport to no longer enqueue data * @qp: NTB transport layer queue to be disabled * * Notify NTB transport layer of client's desire to no longer receive data on * transport queue specified. It is the client's responsibility to ensure all * entries on queue are purged or otherwise handled appropriately.
*/ void ntb_transport_link_down(struct ntb_transport_qp *qp)
{ int val;
if (!qp) return;
qp->client_ready = false;
val = ntb_spad_read(qp->ndev, QP_LINKS);
ntb_peer_spad_write(qp->ndev, PIDX, QP_LINKS, val & ~BIT(qp->qp_num));
if (qp->link_is_up)
ntb_send_link_down(qp); else
cancel_delayed_work_sync(&qp->link_work);
}
EXPORT_SYMBOL_GPL(ntb_transport_link_down);
/** * ntb_transport_link_query - Query transport link state * @qp: NTB transport layer queue to be queried * * Query connectivity to the remote system of the NTB transport queue * * RETURNS: true for link up or false for link down
*/ bool ntb_transport_link_query(struct ntb_transport_qp *qp)
{ if (!qp) returnfalse;
/** * ntb_transport_qp_num - Query the qp number * @qp: NTB transport layer queue to be queried * * Query qp number of the NTB transport queue * * RETURNS: a zero based number specifying the qp number
*/ unsignedchar ntb_transport_qp_num(struct ntb_transport_qp *qp)
{ if (!qp) return 0;
/** * ntb_transport_max_size - Query the max payload size of a qp * @qp: NTB transport layer queue to be queried * * Query the maximum payload size permissible on the given qp * * RETURNS: the max payload size of a qp
*/ unsignedint ntb_transport_max_size(struct ntb_transport_qp *qp)
{ unsignedint max_size; unsignedint copy_align; struct dma_chan *rx_chan, *tx_chan;
/* If DMA engine usage is possible, try to find the max size for that */
max_size = qp->tx_max_frame - sizeof(struct ntb_payload_header);
max_size = round_down(max_size, 1 << copy_align);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.