/* * Linux driver for VMware's para-virtualized SCSI HBA. * * Copyright (C) 2008-2014, VMware, Inc. All Rights Reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; version 2 of the License and no later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or * NON INFRINGEMENT. See the GNU General Public License for more * details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. *
*/
struct pvscsi_ctx { /* * The index of the context in cmd_map serves as the context ID for a * 1-to-1 mapping completions back to requests.
*/ struct scsi_cmnd *cmd; struct pvscsi_sg_list *sgl; struct list_head list;
dma_addr_t dataPA;
dma_addr_t sensePA;
dma_addr_t sglPA; struct completion *abort_cmp;
};
module_param_named(ring_pages, pvscsi_ring_pages, int, PVSCSI_RW);
MODULE_PARM_DESC(ring_pages, "Number of pages per req/cmp ring - (default="
__stringify(PVSCSI_DEFAULT_NUM_PAGES_PER_RING) "[up to 16 targets],"
__stringify(PVSCSI_SETUP_RINGS_MAX_NUM_PAGES) "[for 16+ targets])");
module_param_named(msg_ring_pages, pvscsi_msg_ring_pages, int, PVSCSI_RW);
MODULE_PARM_DESC(msg_ring_pages, "Number of pages for the msg ring - (default="
__stringify(PVSCSI_DEFAULT_NUM_PAGES_MSG_RING) ")");
/* * Map a pvscsi_ctx struct to a context ID field value; we map to a simple * non-zero integer. ctx always points to an entry in cmd_map array, hence * the return value is always >=1.
*/ static u64 pvscsi_map_context(conststruct pvscsi_adapter *adapter, conststruct pvscsi_ctx *ctx)
{ return ctx - adapter->cmd_map + 1;
}
len /= sizeof(*ptr);
pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND, cmd); for (i = 0; i < len; i++)
pvscsi_reg_write(adapter,
PVSCSI_REG_OFFSET_COMMAND_DATA, ptr[i]);
}
staticint scsi_is_rw(unsignedchar op)
{ return op == READ_6 || op == WRITE_6 ||
op == READ_10 || op == WRITE_10 ||
op == READ_12 || op == WRITE_12 ||
op == READ_16 || op == WRITE_16;
}
sge = &ctx->sgl->sge[0]; for (i = 0; i < count; i++, sg = sg_next(sg)) {
sge[i].addr = sg_dma_address(sg);
sge[i].length = sg_dma_len(sg);
sge[i].flags = 0;
}
}
/* * Map all data buffers for a command into PCI space and * setup the scatter/gather list if needed.
*/ staticint pvscsi_map_buffers(struct pvscsi_adapter *adapter, struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd, struct PVSCSIRingReqDesc *e)
{ unsigned count; unsigned bufflen = scsi_bufflen(cmd); struct scatterlist *sg;
sg = scsi_sglist(cmd);
count = scsi_sg_count(cmd); if (count != 0) { int segs = scsi_dma_map(cmd);
if (segs == -ENOMEM) {
scmd_printk(KERN_DEBUG, cmd, "vmw_pvscsi: Failed to map cmd sglist for DMA.\n"); return -ENOMEM;
} elseif (segs > 1) {
pvscsi_create_sg(ctx, sg, segs);
e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
ctx->sglPA = dma_map_single(&adapter->dev->dev,
ctx->sgl, SGL_SIZE, DMA_TO_DEVICE); if (dma_mapping_error(&adapter->dev->dev, ctx->sglPA)) {
scmd_printk(KERN_ERR, cmd, "vmw_pvscsi: Failed to map ctx sglist for DMA.\n");
scsi_dma_unmap(cmd);
ctx->sglPA = 0; return -ENOMEM;
}
e->dataAddr = ctx->sglPA;
} else
e->dataAddr = sg_dma_address(sg);
} else { /* * In case there is no S/G list, scsi_sglist points * directly to the buffer.
*/
ctx->dataPA = dma_map_single(&adapter->dev->dev, sg, bufflen,
cmd->sc_data_direction); if (dma_mapping_error(&adapter->dev->dev, ctx->dataPA)) {
scmd_printk(KERN_DEBUG, cmd, "vmw_pvscsi: Failed to map direct data buffer for DMA.\n"); return -ENOMEM;
}
e->dataAddr = ctx->dataPA;
}
return 0;
}
/* * The device incorrectly doesn't clear the first byte of the sense * buffer in some cases. We have to do it ourselves. * Otherwise we run into trouble when SWIOTLB is forced.
*/ staticvoid pvscsi_patch_sense(struct scsi_cmnd *cmd)
{ if (cmd->sense_buffer)
cmd->sense_buffer[0] = 0;
}
if (adapter->use_msg) { struct PVSCSICmdDescSetupMsgRing cmd_msg = { 0 };
cmd_msg.numPages = adapter->msg_pages;
base = adapter->msgRingPA; for (i = 0; i < adapter->msg_pages; i++) {
cmd_msg.ringPPNs[i] = base >> PAGE_SHIFT;
base += PAGE_SIZE;
}
memset(adapter->msg_ring, 0, adapter->msg_pages * PAGE_SIZE);
staticint pvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
{ if (!sdev->tagged_supported)
qdepth = 1; return scsi_change_queue_depth(sdev, qdepth);
}
/* * Pull a completion descriptor off and pass the completion back * to the SCSI mid layer.
*/ staticvoid pvscsi_complete_request(struct pvscsi_adapter *adapter, conststruct PVSCSIRingCmpDesc *e)
{ struct pvscsi_ctx *ctx; struct scsi_cmnd *cmd; struct completion *abort_cmp;
u32 btstat = e->hostStatus;
u32 sdstat = e->scsiStatus;
ctx = pvscsi_get_context(adapter, e->context);
cmd = ctx->cmd;
abort_cmp = ctx->abort_cmp;
pvscsi_unmap_buffers(adapter, ctx); if (sdstat != SAM_STAT_CHECK_CONDITION)
pvscsi_patch_sense(cmd);
pvscsi_release_context(adapter, ctx); if (abort_cmp) { /* * The command was requested to be aborted. Just signal that * the request completed and swallow the actual cmd completion * here. The abort handler will post a completion for this * command indicating that it got successfully aborted.
*/
complete(abort_cmp); return;
}
cmd->result = 0; if (sdstat != SAM_STAT_GOOD &&
(btstat == BTSTAT_SUCCESS ||
btstat == BTSTAT_LINKED_COMMAND_COMPLETED ||
btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) { if (sdstat == SAM_STAT_COMMAND_TERMINATED) {
cmd->result = (DID_RESET << 16);
} else {
cmd->result = (DID_OK << 16) | sdstat;
}
} else switch (btstat) { case BTSTAT_SUCCESS: case BTSTAT_LINKED_COMMAND_COMPLETED: case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG: /* * Commands like INQUIRY may transfer less data than * requested by the initiator via bufflen. Set residual * count to make upper layer aware of the actual amount * of data returned. There are cases when controller * returns zero dataLen with non zero data - do not set * residual count in that case.
*/ if (e->dataLen && (e->dataLen < scsi_bufflen(cmd)))
scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
cmd->result = (DID_OK << 16); break;
case BTSTAT_DATARUN: case BTSTAT_DATA_UNDERRUN: /* Report residual data in underruns */
scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
cmd->result = (DID_ERROR << 16); break;
case BTSTAT_SELTIMEO: /* Our emulation returns this for non-connected devs */
cmd->result = (DID_BAD_TARGET << 16); break;
case BTSTAT_LUNMISMATCH: case BTSTAT_TAGREJECT: case BTSTAT_BADMSG: case BTSTAT_HAHARDWARE: case BTSTAT_INVPHASE: case BTSTAT_HATIMEOUT: case BTSTAT_NORESPONSE: case BTSTAT_DISCONNECT: case BTSTAT_HASOFTWARE: case BTSTAT_BUSFREE: case BTSTAT_SENSFAILED:
cmd->result |= (DID_ERROR << 16); break;
case BTSTAT_SENTRST: case BTSTAT_RECVRST: case BTSTAT_BUSRESET:
cmd->result = (DID_RESET << 16); break;
case BTSTAT_ABORTQUEUE:
cmd->result = (DID_BUS_BUSY << 16); break;
case BTSTAT_SCSIPARITY:
cmd->result = (DID_PARITY << 16); break;
/* * barrier usage : Since the PVSCSI device is emulated, there could be cases * where we may want to serialize some accesses between the driver and the * emulation layer. We use compiler barriers instead of the more expensive * memory barriers because PVSCSI is only supported on X86 which has strong * memory access ordering.
*/ staticvoid pvscsi_process_completion_ring(struct pvscsi_adapter *adapter)
{ struct PVSCSIRingsState *s = adapter->rings_state; struct PVSCSIRingCmpDesc *ring = adapter->cmp_ring;
u32 cmp_entries = s->cmpNumEntriesLog2;
while (s->cmpConsIdx != s->cmpProdIdx) { struct PVSCSIRingCmpDesc *e = ring + (s->cmpConsIdx &
MASK(cmp_entries)); /* * This barrier() ensures that *e is not dereferenced while * the device emulation still writes data into the slot. * Since the device emulation advances s->cmpProdIdx only after * updating the slot we want to check it first.
*/
barrier();
pvscsi_complete_request(adapter, e); /* * This barrier() ensures that compiler doesn't reorder write * to s->cmpConsIdx before the read of (*e) inside * pvscsi_complete_request. Otherwise, device emulation may * overwrite *e before we had a chance to read it.
*/
barrier();
s->cmpConsIdx++;
}
}
/* * Translate a Linux SCSI request into a request ring entry.
*/ staticint pvscsi_queue_ring(struct pvscsi_adapter *adapter, struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd)
{ struct PVSCSIRingsState *s; struct PVSCSIRingReqDesc *e; struct scsi_device *sdev;
u32 req_entries;
s = adapter->rings_state;
sdev = cmd->device;
req_entries = s->reqNumEntriesLog2;
/* * If this condition holds, we might have room on the request ring, but * we might not have room on the completion ring for the response. * However, we have already ruled out this possibility - we would not * have successfully allocated a context if it were true, since we only * have one context per request entry. Check for it anyway, since it * would be a serious bug.
*/ if (s->reqProdIdx - s->cmpConsIdx >= 1 << req_entries) {
scmd_printk(KERN_ERR, cmd, "vmw_pvscsi: " "ring full: reqProdIdx=%d cmpConsIdx=%d\n",
s->reqProdIdx, s->cmpConsIdx); return -1;
}
e = adapter->req_ring + (s->reqProdIdx & MASK(req_entries));
staticint pvscsi_abort(struct scsi_cmnd *cmd)
{ struct pvscsi_adapter *adapter = shost_priv(cmd->device->host); struct pvscsi_ctx *ctx; unsignedlong flags; int result = SUCCESS;
DECLARE_COMPLETION_ONSTACK(abort_cmp); int done;
scmd_printk(KERN_DEBUG, cmd, "task abort on host %u, %p\n",
adapter->host->host_no, cmd);
spin_lock_irqsave(&adapter->hw_lock, flags);
/* * Poll the completion ring first - we might be trying to abort * a command that is waiting to be dispatched in the completion ring.
*/
pvscsi_process_completion_ring(adapter);
/* * If there is no context for the command, it either already succeeded * or else was never properly issued. Not our problem.
*/
ctx = pvscsi_find_context(adapter, cmd); if (!ctx) {
scmd_printk(KERN_DEBUG, cmd, "Failed to abort cmd %p\n", cmd); goto out;
}
/* * Mark that the command has been requested to be aborted and issue * the abort.
*/
ctx->abort_cmp = &abort_cmp;
pvscsi_abort_cmd(adapter, ctx);
spin_unlock_irqrestore(&adapter->hw_lock, flags); /* Wait for 2 secs for the completion. */
done = wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000));
spin_lock_irqsave(&adapter->hw_lock, flags);
if (!done) { /* * Failed to abort the command, unmark the fact that it * was requested to be aborted.
*/
ctx->abort_cmp = NULL;
result = FAILED;
scmd_printk(KERN_DEBUG, cmd, "Failed to get completion for aborted cmd %p\n",
cmd); goto out;
}
/* * Abort all outstanding requests. This is only safe to use if the completion * ring will never be walked again or the device has been reset, because it * destroys the 1-1 mapping between context field passed to emulation and our * request structure.
*/ staticvoid pvscsi_reset_all(struct pvscsi_adapter *adapter)
{ unsigned i;
for (i = 0; i < adapter->req_depth; i++) { struct pvscsi_ctx *ctx = &adapter->cmd_map[i]; struct scsi_cmnd *cmd = ctx->cmd; if (cmd) {
scmd_printk(KERN_ERR, cmd, "Forced reset on cmd %p\n", cmd);
pvscsi_unmap_buffers(adapter, ctx);
pvscsi_patch_sense(cmd);
pvscsi_release_context(adapter, ctx);
cmd->result = (DID_RESET << 16);
scsi_done(cmd);
}
}
}
if (use_msg) {
adapter->use_msg = false;
spin_unlock_irqrestore(&adapter->hw_lock, flags);
/* * Now that we know that the ISR won't add more work on the * workqueue we can safely flush any outstanding work.
*/
flush_workqueue(adapter->workqueue);
spin_lock_irqsave(&adapter->hw_lock, flags);
}
/* * We're going to tear down the entire ring structure and set it back * up, so stalling new requests until all completions are flushed and * the rings are back in place.
*/
pvscsi_process_request_ring(adapter);
ll_adapter_reset(adapter);
/* * Now process any completions. Note we do this AFTER adapter reset, * which is strange, but stops races where completions get posted * between processing the ring and issuing the reset. The backend will * not touch the ring memory after reset, so the immediately pre-reset * completion ring state is still valid.
*/
pvscsi_process_completion_ring(adapter);
/* * We don't want to queue new requests for this bus after * flushing all pending requests to emulation, since new * requests could then sneak in during this bus reset phase, * so take the lock now.
*/
spin_lock_irqsave(&adapter->hw_lock, flags);
scmd_printk(KERN_INFO, cmd, "SCSI device reset on scsi%u:%u\n",
host->host_no, cmd->device->id);
/* * We don't want to queue new requests for this device after flushing * all pending requests to emulation, since new requests could then * sneak in during this device reset phase, so take the lock now.
*/
spin_lock_irqsave(&adapter->hw_lock, flags);
staticvoid pvscsi_release_resources(struct pvscsi_adapter *adapter)
{ if (adapter->workqueue)
destroy_workqueue(adapter->workqueue);
if (adapter->mmioBase)
pci_iounmap(adapter->dev, adapter->mmioBase);
pci_release_regions(adapter->dev);
if (adapter->cmd_map) {
pvscsi_free_sgls(adapter);
kfree(adapter->cmd_map);
}
if (adapter->rings_state)
dma_free_coherent(&adapter->dev->dev, PAGE_SIZE,
adapter->rings_state, adapter->ringStatePA);
if (adapter->req_ring)
dma_free_coherent(&adapter->dev->dev,
adapter->req_pages * PAGE_SIZE,
adapter->req_ring, adapter->reqRingPA);
if (adapter->cmp_ring)
dma_free_coherent(&adapter->dev->dev,
adapter->cmp_pages * PAGE_SIZE,
adapter->cmp_ring, adapter->cmpRingPA);
if (adapter->msg_ring)
dma_free_coherent(&adapter->dev->dev,
adapter->msg_pages * PAGE_SIZE,
adapter->msg_ring, adapter->msgRingPA);
}
/* * Allocate scatter gather lists. * * These are statically allocated. Trying to be clever was not worth it. * * Dynamic allocation can fail, and we can't go deep into the memory * allocator, since we're a SCSI driver, and trying too hard to allocate * memory might generate disk I/O. We also don't want to fail disk I/O * in that case because we can't get an allocation - the I/O could be * trying to swap out data to free memory. Since that is pathological, * just use a statically allocated scatter list. *
*/ staticint pvscsi_allocate_sg(struct pvscsi_adapter *adapter)
{ struct pvscsi_ctx *ctx; int i;
for (i = 0; i < adapter->req_depth; ++i, ++ctx) {
ctx->sgl = (void *)__get_free_pages(GFP_KERNEL,
get_order(SGL_SIZE));
ctx->sglPA = 0;
BUG_ON(!IS_ALIGNED(((unsignedlong)ctx->sgl), PAGE_SIZE)); if (!ctx->sgl) { for (; i >= 0; --i, --ctx) {
free_pages((unsignedlong)ctx->sgl,
get_order(SGL_SIZE));
ctx->sgl = NULL;
} return -ENOMEM;
}
}
return 0;
}
/* * Query the device, fetch the config info and return the * maximum number of targets on the adapter. In case of * failure due to any reason return default i.e. 16.
*/ static u32 pvscsi_get_max_targets(struct pvscsi_adapter *adapter)
{ struct PVSCSICmdDescConfigCmd cmd; struct PVSCSIConfigPageHeader *header; struct device *dev;
dma_addr_t configPagePA; void *config_page;
u32 numPhys = 16;
dev = pvscsi_dev(adapter);
config_page = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
&configPagePA, GFP_KERNEL); if (!config_page) {
dev_warn(dev, "vmw_pvscsi: failed to allocate memory for config page\n"); gotoexit;
}
BUG_ON(configPagePA & ~PAGE_MASK);
/* Fetch config info from the device. */
cmd.configPageAddress = ((u64)PVSCSI_CONFIG_CONTROLLER_ADDRESS) << 32;
cmd.configPageNum = PVSCSI_CONFIG_PAGE_CONTROLLER;
cmd.cmpAddr = configPagePA;
cmd._pad = 0;
/* * Mark the completion page header with error values. If the device * completes the command successfully, it sets the status values to * indicate success.
*/
header = config_page;
header->hostStatus = BTSTAT_INVPARAM;
header->scsiStatus = SDSTAT_CHECK;
if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
printk(KERN_INFO "vmw_pvscsi: using 64bit dma\n");
} elseif (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) {
printk(KERN_INFO "vmw_pvscsi: using 32bit dma\n");
} else {
printk(KERN_ERR "vmw_pvscsi: failed to set DMA mask\n"); goto out_disable_device;
}
/* * Let's use a temp pvscsi_adapter struct until we find the number of * targets on the adapter, after that we will switch to the real * allocated struct.
*/
adapter = &adapter_temp;
memset(adapter, 0, sizeof(*adapter));
adapter->dev = pdev;
adapter->rev = pdev->revision;
for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { if ((pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE_IO)) continue;
if (pci_resource_len(pdev, i) < PVSCSI_MEM_SPACE_SIZE) continue;
break;
}
if (i == DEVICE_COUNT_RESOURCE) {
printk(KERN_ERR "vmw_pvscsi: adapter has no suitable MMIO region\n"); goto out_release_resources_and_disable;
}
adapter->mmioBase = pci_iomap(pdev, i, PVSCSI_MEM_SPACE_SIZE);
if (!adapter->mmioBase) {
printk(KERN_ERR "vmw_pvscsi: can't iomap for BAR %d memsize %lu\n",
i, PVSCSI_MEM_SPACE_SIZE); goto out_release_resources_and_disable;
}
pci_set_master(pdev);
/* * Ask the device for max number of targets before deciding the * default pvscsi_ring_pages value.
*/
max_id = pvscsi_get_max_targets(adapter);
printk(KERN_INFO "vmw_pvscsi: max_id: %u\n", max_id);
if (pvscsi_ring_pages == 0) /* * Set the right default value. Up to 16 it is 8, above it is * max.
*/
pvscsi_ring_pages = (max_id > 16) ?
PVSCSI_SETUP_RINGS_MAX_NUM_PAGES :
PVSCSI_DEFAULT_NUM_PAGES_PER_RING;
printk(KERN_INFO "vmw_pvscsi: setting ring_pages to %d\n",
pvscsi_ring_pages);
/* * Let's use the real pvscsi_adapter struct here onwards.
*/
adapter = shost_priv(host);
memset(adapter, 0, sizeof(*adapter));
adapter->dev = pdev;
adapter->host = host; /* * Copy back what we already have to the allocated adapter struct.
*/
adapter->rev = adapter_temp.rev;
adapter->mmioBase = adapter_temp.mmioBase;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.