unsignedint admin_timeout = 60;
module_param(admin_timeout, uint, 0644);
MODULE_PARM_DESC(admin_timeout, "timeout in seconds for admin commands");
EXPORT_SYMBOL_GPL(admin_timeout);
unsignedint nvme_io_timeout = 30;
module_param_named(io_timeout, nvme_io_timeout, uint, 0644);
MODULE_PARM_DESC(io_timeout, "timeout in seconds for I/O");
EXPORT_SYMBOL_GPL(nvme_io_timeout);
staticunsignedchar shutdown_timeout = 5;
module_param(shutdown_timeout, byte, 0644);
MODULE_PARM_DESC(shutdown_timeout, "timeout in seconds for controller shutdown");
static u8 nvme_max_retries = 5;
module_param_named(max_retries, nvme_max_retries, byte, 0644);
MODULE_PARM_DESC(max_retries, "max number of retries a command may have");
staticunsignedlong default_ps_max_latency_us = 100000;
module_param(default_ps_max_latency_us, ulong, 0644);
MODULE_PARM_DESC(default_ps_max_latency_us, "max power saving latency for new devices; use PM QOS to change per device");
staticbool force_apst;
module_param(force_apst, bool, 0644);
MODULE_PARM_DESC(force_apst, "allow APST for newly enumerated devices even if quirked off");
/* * Older kernels didn't enable protection information if it was at an offset. * Newer kernels do, so it breaks reads on the upgrade if such formats were * used in prior kernels since the metadata written did not contain a valid * checksum.
*/ staticbool disable_pi_offsets = false;
module_param(disable_pi_offsets, bool, 0444);
MODULE_PARM_DESC(disable_pi_offsets, "disable protection information if it has an offset");
/* * nvme_wq - hosts nvme related works that are not reset or delete * nvme_reset_wq - hosts nvme reset works * nvme_delete_wq - hosts nvme delete works * * nvme_wq will host works such as scan, aen handling, fw activation, * keep-alive, periodic reconnects etc. nvme_reset_wq * runs reset works which also flush works hosted on nvme_wq for * serialization purposes. nvme_delete_wq host controller deletion * works which flush reset works for serialization.
*/ struct workqueue_struct *nvme_wq;
EXPORT_SYMBOL_GPL(nvme_wq);
void nvme_queue_scan(struct nvme_ctrl *ctrl)
{ /* * Only new queue scan work when admin and IO queues are both alive
*/ if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE && ctrl->tagset)
queue_work(nvme_wq, &ctrl->scan_work);
}
/* * Use this function to proceed with scheduling reset_work for a controller * that had previously been set to the resetting state. This is intended for * code paths that can't be interrupted by other reset attempts. A hot removal * may prevent this from succeeding.
*/ int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
{ if (nvme_ctrl_state(ctrl) != NVME_CTRL_RESETTING) return -EBUSY; if (!queue_work(nvme_reset_wq, &ctrl->reset_work)) return -EBUSY; return 0;
}
EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
int nvme_delete_ctrl(struct nvme_ctrl *ctrl)
{ if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) return -EBUSY; if (!queue_work(nvme_delete_wq, &ctrl->delete_work)) return -EBUSY; return 0;
}
EXPORT_SYMBOL_GPL(nvme_delete_ctrl);
void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
{ /* * Keep a reference until nvme_do_delete_ctrl() complete, * since ->delete_ctrl can free the controller.
*/
nvme_get_ctrl(ctrl); if (nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING))
nvme_do_delete_ctrl(ctrl);
nvme_put_ctrl(ctrl);
}
static blk_status_t nvme_error_status(u16 status)
{ switch (status & NVME_SCT_SC_MASK) { case NVME_SC_SUCCESS: return BLK_STS_OK; case NVME_SC_CAP_EXCEEDED: return BLK_STS_NOSPC; case NVME_SC_LBA_RANGE: case NVME_SC_CMD_INTERRUPTED: case NVME_SC_NS_NOT_READY: return BLK_STS_TARGET; case NVME_SC_BAD_ATTRIBUTES: case NVME_SC_INVALID_OPCODE: case NVME_SC_INVALID_FIELD: case NVME_SC_INVALID_NS: return BLK_STS_NOTSUPP; case NVME_SC_WRITE_FAULT: case NVME_SC_READ_ERROR: case NVME_SC_UNWRITTEN_BLOCK: case NVME_SC_ACCESS_DENIED: case NVME_SC_READ_ONLY: case NVME_SC_COMPARE_FAILED: return BLK_STS_MEDIUM; case NVME_SC_GUARD_CHECK: case NVME_SC_APPTAG_CHECK: case NVME_SC_REFTAG_CHECK: case NVME_SC_INVALID_PI: return BLK_STS_PROTECTION; case NVME_SC_RESERVATION_CONFLICT: return BLK_STS_RESV_CONFLICT; case NVME_SC_HOST_PATH_ERROR: return BLK_STS_TRANSPORT; case NVME_SC_ZONE_TOO_MANY_ACTIVE: return BLK_STS_ZONE_ACTIVE_RESOURCE; case NVME_SC_ZONE_TOO_MANY_OPEN: return BLK_STS_ZONE_OPEN_RESOURCE; default: return BLK_STS_IOERR;
}
}
/* The mask and shift result must be <= 3 */
crd = (nvme_req(req)->status & NVME_STATUS_CRD) >> 11; if (crd)
delay = nvme_req(req)->ctrl->crdt[crd - 1] * 100;
/* * Completions of long-running commands should not be able to * defer sending of periodic keep alives, since the controller * may have completed processing such commands a long time ago * (arbitrarily close to command submission time). * req->deadline - req->timeout is the command submission time * in jiffies.
*/ if (ctrl->kas &&
req->deadline - req->timeout >= ctrl->ka_last_check_time)
ctrl->comp_seen = true;
switch (nvme_decide_disposition(req)) { case COMPLETE:
nvme_end_req(req); return; case RETRY:
nvme_retry_req(req); return; case FAILOVER:
nvme_failover_req(req); return; case AUTHENTICATE: #ifdef CONFIG_NVME_HOST_AUTH
queue_work(nvme_wq, &ctrl->dhchap_auth_work);
nvme_retry_req(req); #else
nvme_end_req(req); #endif return;
}
}
EXPORT_SYMBOL_GPL(nvme_complete_rq);
/* * Called to unwind from ->queue_rq on a failed command submission so that the * multipathing code gets called to potentially failover to another path. * The caller needs to unwind all transport specific resource allocations and * must return propagate the return value.
*/
blk_status_t nvme_host_path_error(struct request *req)
{
nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR;
blk_mq_set_request_complete(req);
nvme_complete_rq(req); return BLK_STS_OK;
}
EXPORT_SYMBOL_GPL(nvme_host_path_error);
/* * Waits for the controller state to be resetting, or returns false if it is * not possible to ever transition to that state.
*/ bool nvme_wait_reset(struct nvme_ctrl *ctrl)
{
wait_event(ctrl->state_wq,
nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
nvme_state_terminal(ctrl)); return nvme_ctrl_state(ctrl) == NVME_CTRL_RESETTING;
}
EXPORT_SYMBOL_GPL(nvme_wait_reset);
/* * For something we're not in a state to send to the device the default action * is to busy it and retry it after the controller state is recovered. However, * if the controller is deleting or if anything is marked for failfast or * nvme multipath it is immediately failed. * * Note: commands used to initialize the controller will be marked for failfast. * Note: nvme cli/ioctl commands are marked for failfast.
*/
blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl, struct request *rq)
{ enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
if (state != NVME_CTRL_DELETING_NOIO &&
state != NVME_CTRL_DELETING &&
state != NVME_CTRL_DEAD &&
!test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) return BLK_STS_RESOURCE;
if (!(rq->rq_flags & RQF_DONTPREP))
nvme_clear_nvme_request(rq);
/* * currently we have a problem sending passthru commands * on the admin_q if the controller is not LIVE because we can't * make sure that they are going out after the admin connect, * controller enable and/or other commands in the initialization * sequence. until the controller will be LIVE, fail with * BLK_STS_RESOURCE so that they will be rescheduled.
*/ if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD)) returnfalse;
if (ctrl->ops->flags & NVME_F_FABRICS) { /* * Only allow commands on a live queue, except for the connect * command, which is require to set the queue live in the * appropinquate states.
*/ switch (state) { case NVME_CTRL_CONNECTING: if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
(req->cmd->fabrics.fctype == nvme_fabrics_type_connect ||
req->cmd->fabrics.fctype == nvme_fabrics_type_auth_send ||
req->cmd->fabrics.fctype == nvme_fabrics_type_auth_receive)) returntrue; break; default: break; case NVME_CTRL_DEAD: returnfalse;
}
}
/* * Some devices do not consider the DSM 'Number of Ranges' field when * determining how much data to DMA. Always allocate memory for maximum * number of segments to prevent device reading beyond end of buffer.
*/ staticconst size_t alloc_size = sizeof(*range) * NVME_DSM_MAX_RANGES;
range = kzalloc(alloc_size, GFP_ATOMIC | __GFP_NOWARN); if (!range) { /* * If we fail allocation our range, fallback to the controller * discard page. If that's also busy, it's safe to return * busy, as we know we can make progress once that's freed.
*/ if (test_and_set_bit_lock(0, &ns->ctrl->discard_page_busy)) return BLK_STS_RESOURCE;
/* only type1 and type 2 PI formats have a reftag */ switch (ns->head->pi_type) { case NVME_NS_DPS_PI_TYPE1: case NVME_NS_DPS_PI_TYPE2: break; default: return;
}
/* both rw and write zeroes share the same reftag format */ switch (ns->head->guard_type) { case NVME_NVM_NS_16B_GUARD:
cmnd->rw.reftag = cpu_to_le32(t10_pi_ref_tag(req)); break; case NVME_NVM_NS_64B_GUARD:
ref48 = ext_pi_ref_tag(req);
lower = lower_32_bits(ref48);
upper = upper_32_bits(ref48);
if (nvme_ns_has_pi(ns->head)) {
cmnd->write_zeroes.control |= cpu_to_le16(NVME_RW_PRINFO_PRACT);
nvme_set_ref_tag(ns, cmnd, req);
}
return BLK_STS_OK;
}
/* * NVMe does not support a dedicated command to issue an atomic write. A write * which does adhere to the device atomic limits will silently be executed * non-atomically. The request issuer should ensure that the write is within * the queue atomic writes limits, but just validate this in case it is not.
*/ staticbool nvme_valid_atomic_write(struct request *req)
{ struct request_queue *q = req->q;
u32 boundary_bytes = queue_atomic_write_boundary_bytes(q);
if (blk_rq_bytes(req) > queue_atomic_write_unit_max_bytes(q)) returnfalse;
if (ns->head->ms) { /* * If formatted with metadata, the block layer always provides a * metadata buffer if CONFIG_BLK_DEV_INTEGRITY is enabled. Else * we enable the PRACT bit for protection information or set the * namespace capacity to zero to prevent any I/O.
*/ if (!blk_integrity_rq(req)) { if (WARN_ON_ONCE(!nvme_ns_has_pi(ns->head))) return BLK_STS_NOTSUPP;
control |= NVME_RW_PRINFO_PRACT;
nvme_set_ref_tag(ns, cmnd, req);
}
if (bio_integrity_flagged(req->bio, BIP_CHECK_GUARD))
control |= NVME_RW_PRINFO_PRCHK_GUARD; if (bio_integrity_flagged(req->bio, BIP_CHECK_REFTAG)) {
control |= NVME_RW_PRINFO_PRCHK_REF; if (op == nvme_cmd_zone_append)
control |= NVME_RW_APPEND_PIREMAP;
nvme_set_ref_tag(ns, cmnd, req);
} if (bio_integrity_flagged(req->bio, BIP_CHECK_APPTAG)) {
control |= NVME_RW_PRINFO_PRCHK_APP;
nvme_set_app_tag(req, cmnd);
}
}
if (!(req->rq_flags & RQF_DONTPREP))
nvme_clear_nvme_request(req);
switch (req_op(req)) { case REQ_OP_DRV_IN: case REQ_OP_DRV_OUT: /* these are setup prior to execution in nvme_init_request() */ break; case REQ_OP_FLUSH:
nvme_setup_flush(ns, cmd); break; case REQ_OP_ZONE_RESET_ALL: case REQ_OP_ZONE_RESET:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_RESET); break; case REQ_OP_ZONE_OPEN:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_OPEN); break; case REQ_OP_ZONE_CLOSE:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_CLOSE); break; case REQ_OP_ZONE_FINISH:
ret = nvme_setup_zone_mgmt_send(ns, req, cmd, NVME_ZONE_FINISH); break; case REQ_OP_WRITE_ZEROES:
ret = nvme_setup_write_zeroes(ns, req, cmd); break; case REQ_OP_DISCARD:
ret = nvme_setup_discard(ns, req, cmd); break; case REQ_OP_READ:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_read); break; case REQ_OP_WRITE:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_write); break; case REQ_OP_ZONE_APPEND:
ret = nvme_setup_rw(ns, req, cmd, nvme_cmd_zone_append); break; default:
WARN_ON_ONCE(1); return BLK_STS_IOERR;
}
/* * Return values: * 0: success * >0: nvme controller's cqe status response * <0: kernel error in lieu of controller response
*/ int nvme_execute_rq(struct request *rq, bool at_head)
{
blk_status_t status;
status = blk_execute_rq(rq, at_head); if (nvme_req(rq)->flags & NVME_REQ_CANCELLED) return -EINTR; if (nvme_req(rq)->status) return nvme_req(rq)->status; return blk_status_to_errno(status);
}
EXPORT_SYMBOL_NS_GPL(nvme_execute_rq, "NVME_TARGET_PASSTHRU");
/* * Returns 0 on success. If the result is negative, it's a Linux error code; * if the result is positive, it's an NVM Express status code
*/ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, union nvme_result *result, void *buffer, unsigned bufflen, int qid, nvme_submit_flags_t flags)
{ struct request *req; int ret;
blk_mq_req_flags_t blk_flags = 0;
if (ns) {
effects = le32_to_cpu(ns->head->effects->iocs[opcode]); if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
dev_warn_once(ctrl->device, "IO command:%02x has unusual effects:%08x\n",
opcode, effects);
/* * NVME_CMD_EFFECTS_CSE_MASK causes a freeze all I/O queues, * which would deadlock when done on an I/O command. Note that * We already warn about an unusual effect above.
*/
effects &= ~NVME_CMD_EFFECTS_CSE_MASK;
} else {
effects = le32_to_cpu(ctrl->effects->acs[opcode]);
/* Ignore execution restrictions if any relaxation bits are set */ if (effects & NVME_CMD_EFFECTS_CSER_MASK)
effects &= ~NVME_CMD_EFFECTS_CSE_MASK;
}
/* * For simplicity, IO to all namespaces is quiesced even if the command * effects say only one namespace is affected.
*/ if (effects & NVME_CMD_EFFECTS_CSE_MASK) {
mutex_lock(&ctrl->scan_lock);
mutex_lock(&ctrl->subsys->lock);
nvme_mpath_start_freeze(ctrl->subsys);
nvme_mpath_wait_freeze(ctrl->subsys);
nvme_start_freeze(ctrl);
nvme_wait_freeze(ctrl);
} return effects;
}
EXPORT_SYMBOL_NS_GPL(nvme_passthru_start, "NVME_TARGET_PASSTHRU");
void nvme_passthru_end(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u32 effects, struct nvme_command *cmd, int status)
{ if (effects & NVME_CMD_EFFECTS_CSE_MASK) {
nvme_unfreeze(ctrl);
nvme_mpath_unfreeze(ctrl->subsys);
mutex_unlock(&ctrl->subsys->lock);
mutex_unlock(&ctrl->scan_lock);
} if (effects & NVME_CMD_EFFECTS_CCC) { if (!test_and_set_bit(NVME_CTRL_DIRTY_CAPABILITY,
&ctrl->flags)) {
dev_info(ctrl->device, "controller capabilities changed, reset may be required to take effect.\n");
}
} if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
nvme_queue_scan(ctrl);
flush_work(&ctrl->scan_work);
} if (ns) return;
switch (cmd->common.opcode) { case nvme_admin_set_features: switch (le32_to_cpu(cmd->common.cdw10) & 0xFF) { case NVME_FEAT_KATO: /* * Keep alive commands interval on the host should be * updated when KATO is modified by Set Features * commands.
*/ if (!status)
nvme_update_keep_alive(ctrl, cmd); break; default: break;
} break; default: break;
}
}
EXPORT_SYMBOL_NS_GPL(nvme_passthru_end, "NVME_TARGET_PASSTHRU");
/* * Recommended frequency for KATO commands per NVMe 1.4 section 7.12.1: * * The host should send Keep Alive commands at half of the Keep Alive Timeout * accounting for transport roundtrip times [..].
*/ staticunsignedlong nvme_keep_alive_work_period(struct nvme_ctrl *ctrl)
{ unsignedlong delay = ctrl->kato * HZ / 2;
/* * When using Traffic Based Keep Alive, we need to run * nvme_keep_alive_work at twice the normal frequency, as one * command completion can postpone sending a keep alive command * by up to twice the delay between runs.
*/ if (ctrl->ctratt & NVME_CTRL_ATTR_TBKAS)
delay /= 2; return delay;
}
staticbool nvme_id_cns_ok(struct nvme_ctrl *ctrl, u8 cns)
{ /* * The CNS field occupies a full byte starting with NVMe 1.2
*/ if (ctrl->vs >= NVME_VS(1, 2, 0)) returntrue;
/* * NVMe 1.1 expanded the CNS value to two bits, which means values * larger than that could get truncated and treated as an incorrect * value. * * Qemu implemented 1.0 behavior for controllers claiming 1.1 * compliance, so they need to be quirked here.
*/ if (ctrl->vs >= NVME_VS(1, 1, 0) &&
!(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) return cns <= 3;
/* * NVMe 1.0 used a single bit for the CNS value.
*/ return cns <= 1;
}
staticint nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
{ struct nvme_command c = { }; int error;
/* gcc-4.4.4 (at least) has issues with initializers and anon unions */
c.identify.opcode = nvme_admin_identify;
c.identify.cns = NVME_ID_CNS_CTRL;
*id = kmalloc(sizeof(struct nvme_id_ctrl), GFP_KERNEL); if (!*id) return -ENOMEM;
int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count)
{
u32 q_count = (*count - 1) | ((*count - 1) << 16);
u32 result; int status, nr_io_queues;
status = nvme_set_features(ctrl, NVME_FEAT_NUM_QUEUES, q_count, NULL, 0,
&result);
/* * It's either a kernel error or the host observed a connection * lost. In either case it's not possible communicate with the * controller and thus enter the error code path.
*/ if (status < 0 || status == NVME_SC_HOST_PATH_ERROR) return status;
/* * Degraded controllers might return an error when setting the queue * count. We still want to be able to bring them online and offer * access to the admin queue, as that might be only way to fix them up.
*/ if (status > 0) {
dev_err(ctrl->device, "Could not set queue count (%d)\n", status);
*count = 0;
} else {
nr_io_queues = min(result & 0xffff, result >> 16) + 1;
*count = min(*count, nr_io_queues);
}
status = nvme_set_features(ctrl, NVME_FEAT_ASYNC_EVENT, supported_aens,
NULL, 0, &result); if (status)
dev_warn(ctrl->device, "Failed to configure AEN (cfg %x)\n",
supported_aens);
queue_work(nvme_wq, &ctrl->async_event_work);
}
staticint nvme_ns_open(struct nvme_ns *ns)
{
/* should never be called due to GENHD_FL_HIDDEN */ if (WARN_ON_ONCE(nvme_ns_head_multipath(ns->head))) goto fail; if (!nvme_get_ns(ns)) goto fail; if (!try_module_get(ns->ctrl->ops->module)) goto fail_put_ns;
/* * PI can always be supported as we can ask the controller to simply * insert/strip it, which is not possible for other kinds of metadata.
*/ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) ||
!(head->features & NVME_NS_METADATA_SUPPORTED)) return nvme_ns_has_pi(head);
if (head->pi_size && head->ms >= head->pi_size)
head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; if (!(id->dps & NVME_NS_DPS_PI_FIRST)) { if (disable_pi_offsets)
head->pi_type = 0; else
info->pi_offset = head->ms - head->pi_size;
}
if (ctrl->ops->flags & NVME_F_FABRICS) { /* * The NVMe over Fabrics specification only supports metadata as * part of the extended data LBA. We rely on HCA/HBA support to * remap the separate metadata buffer from the block layer.
*/ if (WARN_ON_ONCE(!(id->flbas & NVME_NS_FLBAS_META_EXT))) return;
head->features |= NVME_NS_EXT_LBAS;
/* * The current fabrics transport drivers support namespace * metadata formats only if nvme_ns_has_pi() returns true. * Suppress support for all other formats so the namespace will * have a 0 capacity and not be usable through the block stack. * * Note, this check will need to be modified if any drivers * gain the ability to use other metadata formats.
*/ if (ctrl->max_integrity_segments && nvme_ns_has_pi(head))
head->features |= NVME_NS_METADATA_SUPPORTED;
} else { /* * For PCIe controllers, we can't easily remap the separate * metadata buffer from the block layer and thus require a * separate metadata buffer for block layer metadata/PI support. * We allow extended LBAs for the passthrough interface, though.
*/ if (id->flbas & NVME_NS_FLBAS_META_EXT)
head->features |= NVME_NS_EXT_LBAS; else
head->features |= NVME_NS_METADATA_SUPPORTED;
}
}
/* * We do not support an offset for the atomic boundaries.
*/ if (id->nabo) return bs;
if ((id->nsfeat & NVME_NS_FEAT_ATOMICS) && id->nawupf) { /* * Use the per-namespace atomic write unit when available.
*/
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; if (id->nabspf)
boundary = (le16_to_cpu(id->nabspf) + 1) * bs;
} else { /* * Use the controller wide atomic write unit. This sucks * because the limit is defined in terms of logical blocks while * namespaces can have different formats, and because there is * no clear language in the specification prohibiting different * values for different controllers in the subsystem.
*/
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
}
/* * The block layer can't support LBA sizes larger than the page size * or smaller than a sector size yet, so catch this early and don't * allow block I/O.
*/ if (blk_validate_block_size(bs)) {
bs = (1 << 9);
valid = false;
}
/* * Linux filesystems assume writing a single physical block is * an atomic operation. Hence limit the physical block size to the * value of the Atomic Write Unit Power Fail parameter.
*/
lim->logical_block_size = bs;
lim->physical_block_size = min(phys_bs, atomic_bs);
lim->io_min = phys_bs;
lim->io_opt = io_opt; if ((ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES) &&
(ns->ctrl->oncs & NVME_CTRL_ONCS_DSM))
lim->max_write_zeroes_sectors = UINT_MAX; else
lim->max_write_zeroes_sectors = ns->ctrl->max_zeroes_sectors; return valid;
}
n = le16_to_cpu(h->numfdpc) + 1; if (fdp_idx > n) {
dev_warn(ctrl->device, "FDP index:%d out of range:%d\n",
fdp_idx, n); /* Proceed without registering FDP streams */
ret = 0; goto out;
}
log = h + 1;
desc = log;
end = log + size - sizeof(*h); for (i = 0; i < fdp_idx; i++) {
log += le16_to_cpu(desc->dsze);
desc = log; if (log >= end) {
dev_warn(ctrl->device, "FDP invalid config descriptor list\n");
ret = 0; goto out;
}
}
if (le32_to_cpu(desc->nrg) > 1) {
dev_warn(ctrl->device, "FDP NRG > 1 not supported\n");
ret = 0; goto out;
}
/* * The FDP configuration is static for the lifetime of the namespace, * so return immediately if we've already registered this namespace's * streams.
*/ if (head->nr_plids) return 0;
ret = nvme_get_features(ctrl, NVME_FEAT_FDP, info->endgid, NULL, 0,
&fdp); if (ret) {
dev_warn(ctrl->device, "FDP get feature status:0x%x\n", ret); return ret;
}
if (!(fdp.flags & FDPCFG_FDPE)) return 0;
ret = nvme_query_fdp_granularity(ctrl, info, fdp.fdpcidx); if (!info->runs) return ret;
if (info->is_rotational)
lim.features |= BLK_FEAT_ROTATIONAL;
/* * Register a metadata profile for PI, or the plain non-integrity NVMe * metadata masquerading as Type 0 if supported, otherwise reject block * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case.
*/ if (!nvme_init_integrity(ns->head, &lim, info))
capacity = 0;
/* * Only set the DEAC bit if the device guarantees that reads from * deallocated data return zeroes. While the DEAC bit does not * require that, it must be a no-op if reads from deallocated data * do not return zeroes.
*/ if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3))) {
ns->head->features |= NVME_NS_DEAC;
lim.max_hw_wzeroes_unmap_sectors = lim.max_write_zeroes_sectors;
}
ret = queue_limits_commit_update(ns->disk->queue, &lim);
--> --------------------
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.