/* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved. * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright (c) 2005 Voltaire, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE.
*/
/* * Must be called with the ufile->device->disassociate_srcu held, and the lock * must be held until use of the ucontext is finished.
*/ struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile)
{ /* * We do not hold the hw_destroy_rwsem lock for this flow, instead * srcu is used. It does not matter if someone races this with * get_context, we get NULL or valid ucontext.
*/ struct ib_ucontext *ucontext = smp_load_acquire(&ufile->ucontext);
if (!srcu_dereference(ufile->device->ib_dev,
&ufile->device->disassociate_srcu)) return ERR_PTR(-EIO);
void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr)
{ /* for XRC target qp's, check that qp is live */ if (!event->element.qp->uobject) return;
/* The first async_event_file becomes the default one for the file. */
mutex_lock(&uverbs_file->ucontext_lock); if (!uverbs_file->default_async_file) { /* Pairs with the put in ib_uverbs_release_file */
uverbs_uobject_get(&async_file->uobj);
smp_store_release(&uverbs_file->default_async_file, async_file);
}
mutex_unlock(&uverbs_file->ucontext_lock);
if ((hdr->in_words + ex_hdr->provider_in_words) * 8 != count) return -EINVAL;
if (hdr->in_words * 8 < method_elm->req_size) return -ENOSPC;
if (ex_hdr->cmd_hdr_reserved) return -EINVAL;
if (ex_hdr->response) { if (!hdr->out_words && !ex_hdr->provider_out_words) return -EINVAL;
if (hdr->out_words * 8 < method_elm->resp_size) return -ENOSPC;
if (!access_ok(u64_to_user_ptr(ex_hdr->response),
(hdr->out_words + ex_hdr->provider_out_words) * 8)) return -EFAULT;
} else { if (hdr->out_words || ex_hdr->provider_out_words) return -EINVAL;
}
return 0;
}
/* not extended command */ if (hdr->in_words * 4 != count) return -EINVAL;
if (count < method_elm->req_size + sizeof(*hdr)) { /* * rdma-core v18 and v19 have a bug where they send DESTROY_CQ * with a 16 byte write instead of 24. Old kernels didn't * check the size so they allowed this. Now that the size is * checked provide a compatibility work around to not break * those userspaces.
*/ if (hdr->command == IB_USER_VERBS_CMD_DESTROY_CQ &&
count == 16) {
hdr->in_words = 6; return 0;
} return -ENOSPC;
} if (hdr->out_words * 4 < method_elm->resp_size) return -ENOSPC;
if (!ib_safe_file_access(filp)) {
pr_err_once("uverbs_write: process %d (%s) changed security contexts after opening file descriptor, this is not allowed.\n",
task_tgid_vnr(current), current->comm); return -EACCES;
}
if (count < sizeof(hdr)) return -EINVAL;
if (copy_from_user(&hdr, buf, sizeof(hdr))) return -EFAULT;
method_elm = uapi_get_method(uapi, hdr.command); if (IS_ERR(method_elm)) return PTR_ERR(method_elm);
if (method_elm->is_ex) { if (count < (sizeof(hdr) + sizeof(ex_hdr))) return -EINVAL; if (copy_from_user(&ex_hdr, buf + sizeof(hdr), sizeof(ex_hdr))) return -EFAULT;
}
ret = verify_hdr(&hdr, &ex_hdr, count, method_elm); if (ret) return ret;
if (method_elm->has_resp) { /* * The macros check that if has_resp is set * then the command request structure starts * with a '__aligned u64 response' member.
*/
ret = get_user(response, (const u64 __user *)buf); if (ret) goto out_unlock;
/* * The VMA has been dup'd, initialize the vm_private_data with a new tracking * struct
*/ staticvoid rdma_umap_open(struct vm_area_struct *vma)
{ struct ib_uverbs_file *ufile = vma->vm_file->private_data; struct rdma_umap_priv *opriv = vma->vm_private_data; struct rdma_umap_priv *priv;
if (!opriv) return;
/* We are racing with disassociation */ if (!down_read_trylock(&ufile->hw_destroy_rwsem)) goto out_zap;
mutex_lock(&ufile->disassociation_lock);
/* * Disassociation already completed, the VMA should already be zapped.
*/ if (!ufile->ucontext) goto out_unlock;
out_unlock:
mutex_unlock(&ufile->disassociation_lock);
up_read(&ufile->hw_destroy_rwsem);
out_zap: /* * We can't allow the VMA to be created with the actual IO pages, that * would break our API contract, and it can't be stopped at this * point, so zap it.
*/
vma->vm_private_data = NULL;
zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
}
/* * The vma holds a reference on the struct file that created it, which * in turn means that the ib_uverbs_file is guaranteed to exist at * this point.
*/
mutex_lock(&ufile->umap_lock); if (priv->entry)
rdma_user_mmap_entry_put(priv->entry);
/* * Once the zap_vma_ptes has been called touches to the VMA will come here and * we return a dummy writable zero page for all the pfns.
*/ static vm_fault_t rdma_umap_fault(struct vm_fault *vmf)
{ struct ib_uverbs_file *ufile = vmf->vma->vm_file->private_data; struct rdma_umap_priv *priv = vmf->vma->vm_private_data;
vm_fault_t ret = 0;
if (!priv) return VM_FAULT_SIGBUS;
/* Read only pages can just use the system zero page. */ if (!(vmf->vma->vm_flags & (VM_WRITE | VM_MAYWRITE))) {
vmf->page = ZERO_PAGE(vmf->address);
get_page(vmf->page); return 0;
}
mutex_lock(&ufile->umap_lock); if (!ufile->disassociate_page)
ufile->disassociate_page =
alloc_pages(vmf->gfp_mask | __GFP_ZERO, 0);
if (ufile->disassociate_page) { /* * This VMA is forced to always be shared so this doesn't have * to worry about COW.
*/
vmf->page = ufile->disassociate_page;
get_page(vmf->page);
} else {
ret = VM_FAULT_SIGBUS;
}
mutex_unlock(&ufile->umap_lock);
/* Get an arbitrary mm pointer that hasn't been cleaned yet */
mutex_lock(&ufile->umap_lock); while (!list_empty(&ufile->umaps)) { int ret;
priv = list_first_entry(&ufile->umaps, struct rdma_umap_priv, list);
mm = priv->vma->vm_mm;
ret = mmget_not_zero(mm); if (!ret) {
list_del_init(&priv->list); if (priv->entry) {
rdma_user_mmap_entry_put(priv->entry);
priv->entry = NULL;
}
mm = NULL; continue;
} break;
}
mutex_unlock(&ufile->umap_lock); if (!mm) {
mutex_unlock(&ufile->disassociation_lock); return;
}
/* * The umap_lock is nested under mmap_lock since it used within * the vma_ops callbacks, so we have to clean the list one mm * at a time to get the lock ordering right. Typically there * will only be one mm, so no big deal.
*/
mmap_read_lock(mm);
mutex_lock(&ufile->umap_lock);
list_for_each_entry_safe (priv, next_priv, &ufile->umaps,
list) { struct vm_area_struct *vma = priv->vma;
if (vma->vm_mm != mm) continue;
list_del_init(&priv->list);
/** * rdma_user_mmap_disassociate() - Revoke mmaps for a device * @device: device to revoke * * This function should be called by drivers that need to disable mmaps for the * device, for instance because it is going to be reset.
*/ void rdma_user_mmap_disassociate(struct ib_device *device)
{ struct ib_uverbs_device *uverbs_dev =
ib_get_client_data(device, &uverbs_client); struct ib_uverbs_file *ufile;
/* * ib_uverbs_open() does not need the BKL: * * - the ib_uverbs_device structures are properly reference counted and * everything else is purely local to the file being created, so * races against other open calls are not a problem; * - there is no ioctl method to race against; * - the open method will either immediately run -ENXIO, or all * required initialization will be done.
*/ staticint ib_uverbs_open(struct inode *inode, struct file *filp)
{ struct ib_uverbs_device *dev; struct ib_uverbs_file *file; struct ib_device *ib_dev; int ret; int module_dependent; int srcu_key;
dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev); if (!refcount_inc_not_zero(&dev->refcount)) return -ENXIO;
get_device(&dev->dev);
srcu_key = srcu_read_lock(&dev->disassociate_srcu);
mutex_lock(&dev->lists_mutex);
ib_dev = srcu_dereference(dev->ib_dev,
&dev->disassociate_srcu); if (!ib_dev) {
ret = -EIO; goto err;
}
if (!rdma_dev_access_netns(ib_dev, current->nsproxy->net_ns)) {
ret = -EPERM; goto err;
}
/* In case IB device supports disassociate ucontext, there is no hard * dependency between uverbs device and its low level device.
*/
module_dependent = !(ib_dev->ops.disassociate_ucontext);
if (module_dependent) { if (!try_module_get(ib_dev->ops.owner)) {
ret = -ENODEV; goto err;
}
}
file = kzalloc(sizeof(*file), GFP_KERNEL); if (!file) {
ret = -ENOMEM; if (module_dependent) goto err_module;
/* * To support DRIVER_ID binding in userspace some of the driver need * upgrading to expose their PCI dependent revision information * through get_context instead of relying on modalias matching. When * the drivers are fixed they can drop this flag.
*/ if (!ibdev->ops.uverbs_no_driver_id_binding) {
ret = nla_put_u32(res->nl_msg, RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID,
ibdev->ops.driver_id); if (ret) return ret;
} return 0;
}
/* We must release the mutex before going ahead and calling * uverbs_cleanup_ufile, as it might end up indirectly calling * uverbs_close, for example due to freeing the resources (e.g * mmput).
*/
mutex_unlock(&uverbs_dev->lists_mutex);
if (device->ops.disassociate_ucontext) { /* We disassociate HW resources and immediately return. * Userspace will see a EIO errno for all future access. * Upon returning, ib_device may be freed internally and is not * valid any more. * uverbs_device is still available until all clients close * their files, then the uverbs device ref count will be zero * and its resources will be freed. * Note: At this point no more files can be opened since the * cdev was deleted, however active clients can still issue * commands and close their open files.
*/
ib_uverbs_free_hw_resources(uverbs_dev, device);
wait_clients = 0;
}
if (refcount_dec_and_test(&uverbs_dev->refcount))
ib_uverbs_comp_dev(uverbs_dev); if (wait_clients)
wait_for_completion(&uverbs_dev->comp);
put_device(&uverbs_dev->dev);
}
staticint __init ib_uverbs_init(void)
{ int ret;
ret = register_chrdev_region(IB_UVERBS_BASE_DEV,
IB_UVERBS_NUM_FIXED_MINOR, "infiniband_verbs"); if (ret) {
pr_err("user_verbs: couldn't register device number\n"); goto out;
}
ret = alloc_chrdev_region(&dynamic_uverbs_dev, 0,
IB_UVERBS_NUM_DYNAMIC_MINOR, "infiniband_verbs"); if (ret) {
pr_err("couldn't register dynamic device number\n"); goto out_alloc;
}
ret = class_register(&uverbs_class); if (ret) {
pr_err("user_verbs: couldn't create class infiniband_verbs\n"); goto out_chrdev;
}
ret = class_create_file(&uverbs_class, &class_attr_abi_version.attr); if (ret) {
pr_err("user_verbs: couldn't create abi_version attribute\n"); goto out_class;
}
ret = ib_register_client(&uverbs_client); if (ret) {
pr_err("user_verbs: couldn't register client\n"); goto out_class;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.