/* * Do not allow unprivileged passthrough on partitions, as that allows an * escape from the containment of the partition.
*/ if (flags & NVME_IOCTL_PARTITION) goto admin;
/* * Do not allow unprivileged processes to send vendor specific or fabrics * commands as we can't be sure about their effects.
*/ if (c->common.opcode >= nvme_cmd_vendor_start ||
c->common.opcode == nvme_fabrics_command) goto admin;
/* * Do not allow unprivileged passthrough of admin commands except * for a subset of identify commands that contain information required * to form proper I/O commands in userspace and do not expose any * potentially sensitive information.
*/ if (!ns) { if (c->common.opcode == nvme_admin_identify) { switch (c->identify.cns) { case NVME_ID_CNS_NS: case NVME_ID_CNS_CS_NS: case NVME_ID_CNS_NS_CS_INDEP: case NVME_ID_CNS_CS_CTRL: case NVME_ID_CNS_CTRL: returntrue;
}
} goto admin;
}
/* * Check if the controller provides a Commands Supported and Effects log * and marks this command as supported. If not reject unprivileged * passthrough.
*/
effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode); if (!(effects & NVME_CMD_EFFECTS_CSUPP)) goto admin;
/* * Don't allow passthrough for command that have intrusive (or unknown) * effects.
*/ if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
NVME_CMD_EFFECTS_UUID_SEL |
NVME_CMD_EFFECTS_SCOPE_MASK)) goto admin;
/* * Only allow I/O commands that transfer data to the controller or that * change the logical block contents if the file descriptor is open for * writing.
*/ if ((nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC)) &&
!open_for_write) goto admin;
/* * Convert integer values from ioctl structures to user pointers, silently * ignoring the upper bits in the compat case to match behaviour of 32-bit * kernels.
*/ staticvoid __user *nvme_to_user_ptr(uintptr_t ptrval)
{ if (in_compat_syscall())
ptrval = (compat_uptr_t)ptrval; return (void __user *)ptrval;
}
if (!nvme_ctrl_sgl_supported(ctrl))
dev_warn_once(ctrl->device, "using unchecked data buffer\n"); if (has_metadata) { if (!supports_metadata) return -EINVAL;
if (!nvme_ctrl_meta_sgl_supported(ctrl))
dev_warn_once(ctrl->device, "using unchecked metadata buffer\n");
}
if (iter)
ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL); else
ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer),
bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0,
0, rq_data_dir(req));
if (ret) return ret;
bio = req->bio; if (bdev)
bio_set_dev(bio, bdev);
if (has_metadata) {
ret = blk_rq_integrity_map_user(req, meta_buffer, meta_len); if (ret) goto out_unmap;
}
return ret;
out_unmap: if (bio)
blk_rq_unmap_user(bio); return ret;
}
/* * IOPOLL could potentially complete this request directly, but * if multiple rings are polling on the same queue, then it's possible * for one ring to find completions for another ring. Punting the * completion via task_work will always direct it to the right * location, rather than potentially complete requests for ringA * under iopoll invocations from ringB.
*/
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); return RQ_END_IO_FREE;
}
if (d.data_len) {
ret = nvme_map_user_request(req, d.addr, d.data_len,
nvme_to_user_ptr(d.metadata), d.metadata_len,
map_iter, vec ? NVME_IOCTL_VEC : 0); if (ret) goto out_free_req;
}
/* to free bio on completion, as req->bio will be null at that time */
pdu->bio = req->bio;
pdu->req = req;
req->end_io_data = ioucmd;
req->end_io = nvme_uring_cmd_end_io;
blk_execute_rq_nowait(req, false); return -EIOCBQUEUED;
staticint nvme_ns_ioctl(struct nvme_ns *ns, unsignedint cmd, void __user *argp, unsignedint flags, bool open_for_write)
{ switch (cmd) { case NVME_IOCTL_ID:
force_successful_syscall_return(); return ns->head->ns_id; case NVME_IOCTL_IO_CMD: return nvme_user_cmd(ns->ctrl, ns, argp, flags, open_for_write); /* * struct nvme_user_io can have different padding on some 32-bit ABIs. * Just accept the compat version as all fields that are used are the * same size and at the same offset.
*/ #ifdef COMPAT_FOR_U64_ALIGNMENT case NVME_IOCTL_SUBMIT_IO32: #endif case NVME_IOCTL_SUBMIT_IO: return nvme_submit_io(ns, argp); case NVME_IOCTL_IO64_CMD_VEC:
flags |= NVME_IOCTL_VEC;
fallthrough; case NVME_IOCTL_IO64_CMD: return nvme_user_cmd64(ns->ctrl, ns, argp, flags,
open_for_write); default: return -ENOTTY;
}
}
if (bdev_is_partition(bdev))
flags |= NVME_IOCTL_PARTITION;
srcu_idx = srcu_read_lock(&head->srcu);
ns = nvme_find_path(head); if (!ns) goto out_unlock;
/* * Handle ioctls that apply to the controller instead of the namespace * separately and drop the ns SRCU reference early. This avoids a * deadlock when deleting namespaces using the passthrough interface.
*/ if (is_ctrl_ioctl(cmd)) return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx,
open_for_write);
srcu_idx = srcu_read_lock(&ctrl->srcu); if (list_empty(&ctrl->namespaces)) {
ret = -ENOTTY; goto out_unlock;
}
ns = list_first_or_null_rcu(&ctrl->namespaces, struct nvme_ns, list); if (ns != list_last_entry(&ctrl->namespaces, struct nvme_ns, list)) {
dev_warn(ctrl->device, "NVME_IOCTL_IO_CMD not supported when multiple namespaces present!\n");
ret = -EINVAL; goto out_unlock;
}
dev_warn(ctrl->device, "using deprecated NVME_IOCTL_IO_CMD ioctl on the char device!\n"); if (!nvme_get_ns(ns)) {
ret = -ENXIO; goto out_unlock;
}
srcu_read_unlock(&ctrl->srcu, srcu_idx);
ret = nvme_user_cmd(ctrl, ns, argp, 0, open_for_write);
nvme_put_ns(ns); return ret;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.