list_for_each_entry_safe(p, n, pages, lru)
__free_page(p);
INIT_LIST_HEAD(pages);
}
/* * Given an array of items in userspace, return a list of pages * containing the data. If copying fails, either because of memory * allocation failure or a problem reading user memory, return an * error code; its up to the caller to dispose of any partial list.
*/ staticint gather_array(struct list_head *pagelist, unsigned nelem, size_t size, constvoid __user *data)
{ unsigned pageidx; void *pagedata; int ret;
ret = -EFAULT; if (copy_from_user(pagedata + pageidx, data, size)) goto fail;
data += size;
pageidx += size;
}
ret = 0;
fail: return ret;
}
/* * Call function "fn" on each element of the array fragmented * over a list of pages.
*/ staticint traverse_pages(unsigned nelem, size_t size, struct list_head *pos, int (*fn)(void *data, void *state), void *state)
{ void *pagedata; unsigned pageidx; int ret = 0;
ret = (*fn)(pagedata + pageidx, state); if (ret) break;
pageidx += size;
}
return ret;
}
/* * Similar to traverse_pages, but use each page as a "block" of * data to be processed as one unit.
*/ staticint traverse_pages_block(unsigned nelem, size_t size, struct list_head *pos, int (*fn)(void *data, int nr, void *state), void *state)
{ void *pagedata; int ret = 0;
BUG_ON(size > PAGE_SIZE);
while (nelem) { int nr = (PAGE_SIZE/size); struct page *page; if (nr > nelem)
nr = nelem;
pos = pos->next;
page = list_entry(pos, struct page, lru);
pagedata = page_address(page);
ret = (*fn)(pagedata, nr, state); if (ret) break;
nelem -= nr;
}
/* Do not allow range to wrap the address space. */ if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
((unsignedlong)(msg->npages << PAGE_SHIFT) >= -st->va)) return -EINVAL;
/* Range chunks must be contiguous in va space. */ if ((msg->va != st->va) ||
((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end)) return -EINVAL;
struct mmap_batch_state {
domid_t domain; unsignedlong va; struct vm_area_struct *vma; int index; /* A tristate: * 0 for no errors * 1 if at least one error has happened (and no * -ENOENT errors have happened) * -ENOENT if at least 1 -ENOENT has happened.
*/ int global_error; int version;
/* User-space gfn array to store errors in the second pass for V1. */
xen_pfn_t __user *user_gfn; /* User-space int array to store errors in the second pass for V2. */ int __user *user_err;
};
/* auto translated dom0 note: if domU being created is PV, then gfn is * mfn(addr on bus). If it's auto xlated, then gfn is pfn (input to HAP).
*/ staticint mmap_batch_fn(void *data, int nr, void *state)
{
xen_pfn_t *gfnp = data; struct mmap_batch_state *st = state; struct vm_area_struct *vma = st->vma; struct page **pages = vma->vm_private_data; struct page **cur_pages = NULL; int ret;
if (xen_feature(XENFEAT_auto_translated_physmap))
cur_pages = &pages[st->index];
/* Adjust the global_error? */ if (ret != nr) { if (ret == -ENOENT)
st->global_error = -ENOENT; else { /* Record that at least one error has happened. */ if (st->global_error == 0)
st->global_error = 1;
}
}
st->va += XEN_PAGE_SIZE * nr;
st->index += nr / XEN_PFN_PER_PAGE;
return 0;
}
staticint mmap_return_error(int err, struct mmap_batch_state *st)
{ int ret;
if (st->version == 1) { if (err) {
xen_pfn_t gfn;
ret = get_user(gfn, st->user_gfn); if (ret < 0) return ret; /* * V1 encodes the error codes in the 32bit top * nibble of the gfn (with its known * limitations vis-a-vis 64 bit callers).
*/
gfn |= (err == -ENOENT) ?
PRIVCMD_MMAPBATCH_PAGED_ERROR :
PRIVCMD_MMAPBATCH_MFN_ERROR; return __put_user(gfn, st->user_gfn++);
} else
st->user_gfn++;
} else { /* st->version == 2 */ if (err) return __put_user(err, st->user_err++); else
st->user_err++;
}
return 0;
}
staticint mmap_return_errors(void *data, int nr, void *state)
{ struct mmap_batch_state *st = state; int *errs = data; int i; int ret;
for (i = 0; i < nr; i++) {
ret = mmap_return_error(errs[i], st); if (ret < 0) return ret;
} return 0;
}
/* Allocate pfns that are then mapped with gfns from foreign domid. Update * the vma with the page info to use later. * Returns: 0 if success, otherwise -errno
*/ staticint alloc_empty_pages(struct vm_area_struct *vma, int numpgs)
{ int rc; struct page **pages;
ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t), m.arr);
if (ret) goto out; if (list_empty(&pagelist)) {
ret = -EINVAL; goto out;
}
if (version == 2) { /* Zero error array now to only copy back actual errors. */ if (clear_user(m.err, sizeof(int) * m.num)) {
ret = -EFAULT; goto out;
}
}
mmap_write_lock(mm);
vma = find_vma(mm, m.addr); if (!vma ||
vma->vm_ops != &privcmd_vm_ops) {
ret = -EINVAL; goto out_unlock;
}
/* * Caller must either: * * Map the whole VMA range, which will also allocate all the * pages required for the auto_translated_physmap case. * * Or * * Map unmapped holes left from a previous map attempt (e.g., * because those foreign frames were previously paged out).
*/ if (vma->vm_private_data == NULL) { if (m.addr != vma->vm_start ||
m.addr + (nr_pages << PAGE_SHIFT) != vma->vm_end) {
ret = -EINVAL; goto out_unlock;
} if (xen_feature(XENFEAT_auto_translated_physmap)) {
ret = alloc_empty_pages(vma, nr_pages); if (ret < 0) goto out_unlock;
} else
vma->vm_private_data = PRIV_VMA_LOCKED;
} else { if (m.addr < vma->vm_start ||
m.addr + (nr_pages << PAGE_SHIFT) > vma->vm_end) {
ret = -EINVAL; goto out_unlock;
} if (privcmd_vma_range_is_mapped(vma, m.addr, nr_pages)) {
ret = -EINVAL; goto out_unlock;
}
}
if (state.global_error) { /* Write back errors in second pass. */
state.user_gfn = (xen_pfn_t *)m.arr;
state.user_err = m.err;
ret = traverse_pages_block(m.num, sizeof(xen_pfn_t),
&pagelist, mmap_return_errors, &state);
} else
ret = 0;
/* If we have not had any EFAULT-like global errors then set the global
* error to -ENOENT if necessary. */ if ((ret == 0) && (state.global_error == -ENOENT))
ret = -ENOENT;
out:
free_page_list(&pagelist); return ret;
out_unlock:
mmap_write_unlock(mm); goto out;
}
staticint lock_pages( struct privcmd_dm_op_buf kbufs[], unsignedint num, struct page *pages[], unsignedint nr_pages, unsignedint *pinned)
{ unsignedint i, off = 0;
for (i = 0; i < num; ) { unsignedint requested; int page_count;
if (copy_from_user(&dom, udata, sizeof(dom))) return -EFAULT;
/* Set restriction to the specified domain, or check it matches */ if (data->domid == DOMID_INVALID)
data->domid = dom; elseif (data->domid != dom) return -EINVAL;
if (!kdata.addr && !kdata.num) { /* Query the size of the resource. */
rc = HYPERVISOR_memory_op(XENMEM_acquire_resource, &xdata); if (rc) return rc; return __put_user(xdata.nr_frames, &udata->num);
}
/* Don't repeat the error message for consecutive failures */ if (rc && !kirqfd->error) {
pr_err("Failed to configure irq for guest domain: %d\n",
kirqfd->dom);
}
if (fd_empty(f)) {
ret = -EBADF; goto error_kfree;
}
kirqfd->eventfd = eventfd_ctx_fileget(fd_file(f)); if (IS_ERR(kirqfd->eventfd)) {
ret = PTR_ERR(kirqfd->eventfd); goto error_kfree;
}
/* * Install our own custom wake-up handling so we are notified via a * callback whenever someone signals the underlying eventfd.
*/
init_waitqueue_func_entry(&kirqfd->wait, irqfd_wakeup);
init_poll_funcptr(&kirqfd->pt, irqfd_poll_func);
spin_lock_irqsave(&irqfds_lock, flags);
list_for_each_entry(tmp, &irqfds_list, list) { if (kirqfd->eventfd == tmp->eventfd) {
ret = -EBUSY;
spin_unlock_irqrestore(&irqfds_lock, flags); goto error_eventfd;
}
}
/* * Check if there was an event already pending on the eventfd before we * registered, and trigger it as if we didn't miss it.
*/
events = vfs_poll(fd_file(f), &kirqfd->pt); if (events & EPOLLIN)
irqfd_inject(kirqfd);
/* * Block until we know all outstanding shutdown jobs have completed so * that we guarantee there will not be any more interrupts once this * deassign function returns.
*/
flush_workqueue(irqfd_cleanup_wq);
/* * We need a barrier, smp_mb(), here to ensure reads are finished before * `state` is updated. Since the lock implementation ensures that * appropriate barrier will be added anyway, we can avoid adding * explicit barrier here. * * Ideally we don't need to update `state` within the locks, but we do * that here to avoid adding explicit barrier.
*/
/* * We need a barrier, smp_mb(), here to ensure writes are finished * before `state` is updated. Since the lock implementation ensures that * appropriate barrier will be added anyway, we can avoid adding * explicit barrier here.
*/
ioreq->state = state;
if (state == STATE_IORESP_READY) {
notify_remote_via_evtchn(port->port); return IRQ_HANDLED;
}
/* The memory for ioreq server must have been mapped earlier */
mmap_write_lock(mm);
vma = find_vma(mm, (unsignedlong)ioeventfd->ioreq); if (!vma) {
pr_err("Failed to find vma for ioreq page!\n");
mmap_write_unlock(mm);
ret = -EFAULT; goto error_kfree;
}
/* * kioreq fields can be accessed here without a lock as they are * never updated after being added to the ioreq_list.
*/ if (kioreq->uioreq != ioeventfd->ioreq) { continue;
} elseif (kioreq->dom != ioeventfd->dom ||
kioreq->vcpus != ioeventfd->vcpus) {
pr_err("Invalid ioeventfd configuration mismatch, dom (%u vs %u), vcpus (%u vs %u)\n",
kioreq->dom, ioeventfd->dom, kioreq->vcpus,
ioeventfd->vcpus); return ERR_PTR(-EINVAL);
}
/* Look for a duplicate eventfd for the same guest */
spin_lock_irqsave(&kioreq->lock, flags);
list_for_each_entry(kioeventfd, &kioreq->ioeventfds, list) { if (eventfd == kioeventfd->eventfd) {
spin_unlock_irqrestore(&kioreq->lock, flags); return ERR_PTR(-EBUSY);
}
}
spin_unlock_irqrestore(&kioreq->lock, flags);
return kioreq;
}
/* Matching kioreq isn't found, allocate a new one */ return alloc_ioreq(ioeventfd);
}
staticint privcmd_ioeventfd_deassign(struct privcmd_ioeventfd *ioeventfd)
{ struct privcmd_kernel_ioreq *kioreq, *tkioreq; struct eventfd_ctx *eventfd; unsignedlong flags; int ret = 0;
eventfd = eventfd_ctx_fdget(ioeventfd->event_fd); if (IS_ERR(eventfd)) return PTR_ERR(eventfd);
mutex_lock(&ioreq_lock);
list_for_each_entry_safe(kioreq, tkioreq, &ioreq_list, list) { struct privcmd_kernel_ioeventfd *kioeventfd, *tmp; /* * kioreq fields can be accessed here without a lock as they are * never updated after being added to the ioreq_list.
*/ if (kioreq->dom != ioeventfd->dom ||
kioreq->uioreq != ioeventfd->ioreq ||
kioreq->vcpus != ioeventfd->vcpus) continue;
staticint privcmd_mmap(struct file *file, struct vm_area_struct *vma)
{ /* DONTCOPY is essential for Xen because copy_page_range doesn't know
* how to recreate these mappings */
vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTCOPY |
VM_DONTEXPAND | VM_DONTDUMP);
vma->vm_ops = &privcmd_vm_ops;
vma->vm_private_data = NULL;
return 0;
}
/* * For MMAPBATCH*. This allows asserting the singleshot mapping * on a per pfn/pte basis. Mapping calls that fail with ENOENT * can be then retried until success.
*/ staticint is_mapped_fn(pte_t *pte, unsignedlong addr, void *data)
{ return pte_none(ptep_get(pte)) ? 0 : -EBUSY;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.