// SPDX-License-Identifier: GPL-2.0-or-later /* * VAS user space API for its accelerators (Only NX-GZIP is supported now) * Copyright (C) 2019 Haren Myneni, IBM Corp
*/
/* * The driver creates the device node that can be used as follows: * For NX-GZIP * * fd = open("/dev/crypto/nx-gzip", O_RDWR); * rc = ioctl(fd, VAS_TX_WIN_OPEN, &attr); * paste_addr = mmap(NULL, PAGE_SIZE, prot, MAP_SHARED, fd, 0ULL). * vas_copy(&crb, 0, 1); * vas_paste(paste_addr, 0, 1); * close(fd) or exit process to close window. * * where "vas_copy" and "vas_paste" are defined in copy-paste.h. * copy/paste returns to the user space directly. So refer NX hardware * documentation for exact copy/paste usage and completion / error * conditions.
*/
/* * Wrapper object for the nx-gzip device - there is just one instance of * this node for the whole system.
*/ staticstruct coproc_dev { struct cdev cdev; struct device *device; char *name;
dev_t devt; structclass *class; enum vas_cop_type cop_type; conststruct vas_user_win_ops *vops;
} coproc_device;
/* * Take reference to pid and mm
*/ int get_vas_user_win_ref(struct vas_user_win_ref *task_ref)
{ /* * Window opened by a child thread may not be closed when * it exits. So take reference to its pid and release it * when the window is free by parent thread. * Acquire a reference to the task's pid to make sure * pid will not be re-used - needed only for multithread * applications.
*/
task_ref->pid = get_task_pid(current, PIDTYPE_PID); /* * Acquire a reference to the task's mm.
*/
task_ref->mm = get_task_mm(current); if (!task_ref->mm) {
put_pid(task_ref->pid);
pr_err("pid(%d): mm_struct is not found\n",
current->pid); return -EPERM;
}
mmgrab(task_ref->mm);
mmput(task_ref->mm); /* * Process closes window during exit. In the case of * multithread application, the child thread can open * window and can exit without closing it. So takes tgid * reference until window closed to make sure tgid is not * reused.
*/
task_ref->tgid = find_get_pid(task_tgid_vnr(current));
return 0;
}
/* * Successful return must release the task reference with * put_task_struct
*/ staticbool ref_get_pid_and_task(struct vas_user_win_ref *task_ref, struct task_struct **tskp, struct pid **pidp)
{ struct task_struct *tsk; struct pid *pid;
pid = task_ref->pid;
tsk = get_pid_task(pid, PIDTYPE_PID); if (!tsk) {
pid = task_ref->tgid;
tsk = get_pid_task(pid, PIDTYPE_PID); /* * Parent thread (tgid) will be closing window when it * exits. So should not get here.
*/ if (WARN_ON_ONCE(!tsk)) returnfalse;
}
/* Return if the task is exiting. */ if (tsk->flags & PF_EXITING) {
put_task_struct(tsk); returnfalse;
}
*tskp = tsk;
*pidp = pid;
returntrue;
}
/* * Update the CSB to indicate a translation error. * * User space will be polling on CSB after the request is issued. * If NX can handle the request without any issues, it updates CSB. * Whereas if NX encounters page fault, the kernel will handle the * fault and update CSB with translation error. * * If we are unable to update the CSB means copy_to_user failed due to * invalid csb_addr, send a signal to the process.
*/ void vas_update_csb(struct coprocessor_request_block *crb, struct vas_user_win_ref *task_ref)
{ struct coprocessor_status_block csb; struct kernel_siginfo info; struct task_struct *tsk; void __user *csb_addr; struct pid *pid; int rc;
/* * NX user space windows can not be opened for task->mm=NULL * and faults will not be generated for kernel requests.
*/ if (WARN_ON_ONCE(!task_ref->mm)) return;
/* * NX operates and returns in BE format as defined CRB struct. * So saves fault_storage_addr in BE as NX pastes in FIFO and * expects user space to convert to CPU format.
*/
csb.address = crb->stamp.nx.fault_storage_addr;
csb.flags = 0;
/* * Process closes send window after all pending NX requests are * completed. In multi-thread applications, a child thread can * open a window and can exit without closing it. May be some * requests are pending or this window can be used by other * threads later. We should handle faults if NX encounters * pages faults on these requests. Update CSB with translation * error and fault address. If csb_addr passed by user space is * invalid, send SEGV signal to pid saved in window. If the * child thread is not running, send the signal to tgid. * Parent thread (tgid) will close this window upon its exit. * * pid and mm references are taken when window is opened by * process (pid). So tgid is used only when child thread opens * a window and exits without closing it.
*/
if (!ref_get_pid_and_task(task_ref, &tsk, &pid)) return;
kthread_use_mm(task_ref->mm);
rc = copy_to_user(csb_addr, &csb, sizeof(csb)); /* * User space polls on csb.flags (first byte). So add barrier * then copy first byte with csb flags update.
*/ if (!rc) {
csb.flags = CSB_V; /* Make sure update to csb.flags is visible now */
smp_mb();
rc = copy_to_user(csb_addr, &csb, sizeof(u8));
}
kthread_unuse_mm(task_ref->mm);
put_task_struct(tsk);
clear_siginfo(&info);
info.si_signo = SIGSEGV;
info.si_errno = EFAULT;
info.si_code = SEGV_MAPERR;
info.si_addr = csb_addr; /* * process will be polling on csb.flags after request is sent to * NX. So generally CSB update should not fail except when an * application passes invalid csb_addr. So an error message will * be displayed and leave it to user space whether to ignore or * handle this signal.
*/
rcu_read_lock();
rc = kill_pid_info(SIGSEGV, &info, pid);
rcu_read_unlock();
if (cp_inst->txwin) { if (cp_inst->coproc->vops &&
cp_inst->coproc->vops->close_win) {
rc = cp_inst->coproc->vops->close_win(cp_inst->txwin); if (rc) return rc;
}
cp_inst->txwin = NULL;
}
kfree(cp_inst);
fp->private_data = NULL;
/* * We don't know here if user has other receive windows * open, so we can't really call clear_thread_tidr(). * So, once the process calls set_thread_tidr(), the * TIDR value sticks around until process exits, resulting * in an extra copy in restore_sprs().
*/
return 0;
}
/* * If the executed instruction that caused the fault was a paste, then * clear regs CR0[EQ], advance NIP, and return 0. Else return error code.
*/ staticint do_fail_paste(void)
{ struct pt_regs *regs = current->thread.regs;
u32 instword;
if (WARN_ON_ONCE(!regs)) return -EINVAL;
if (WARN_ON_ONCE(!user_mode(regs))) return -EINVAL;
/* * If we couldn't translate the instruction, the driver should * return success without handling the fault, it will be retried * or the instruction fetch will fault.
*/ if (get_user(instword, (u32 __user *)(regs->nip))) return -EAGAIN;
/* * Not a paste instruction, driver may fail the fault.
*/ if ((instword & PPC_INST_PASTE_MASK) != PPC_INST_PASTE) return -ENOENT;
regs->ccr &= ~0xe0000000; /* Clear CR0[0-2] to fail paste */
regs_add_return_ip(regs, 4); /* Emulate the paste */
return 0;
}
/* * This fault handler is invoked when the core generates page fault on * the paste address. Happens if the kernel closes window in hypervisor * (on pseries) due to lost credit or the paste address is not mapped.
*/ static vm_fault_t vas_mmap_fault(struct vm_fault *vmf)
{ struct vm_area_struct *vma = vmf->vma; struct file *fp = vma->vm_file; struct coproc_instance *cp_inst = fp->private_data; struct vas_window *txwin;
vm_fault_t fault;
u64 paste_addr; int ret;
/* * window is not opened. Shouldn't expect this error.
*/ if (!cp_inst || !cp_inst->txwin) {
pr_err("Unexpected fault on paste address with TX window closed\n"); return VM_FAULT_SIGBUS;
}
txwin = cp_inst->txwin; /* * When the LPAR lost credits due to core removal or during * migration, invalidate the existing mapping for the current * paste addresses and set windows in-active (zap_vma_pages in * reconfig_close_windows()). * New mapping will be done later after migration or new credits * available. So continue to receive faults if the user space * issue NX request.
*/ if (txwin->task_ref.vma != vmf->vma) {
pr_err("No previous mapping with paste address\n"); return VM_FAULT_SIGBUS;
}
/* * The window may be inactive due to lost credit (Ex: core * removal with DLPAR). If the window is active again when * the credit is available, map the new paste address at the * window virtual address.
*/
scoped_guard(mutex, &txwin->task_ref.mmap_mutex) { if (txwin->status == VAS_WIN_ACTIVE) {
paste_addr = cp_inst->coproc->vops->paste_addr(txwin); if (paste_addr) {
fault = vmf_insert_pfn(vma, vma->vm_start,
(paste_addr >> PAGE_SHIFT)); return fault;
}
}
}
/* * Received this fault due to closing the actual window. * It can happen during migration or lost credits. * Since no mapping, return the paste instruction failure * to the user space.
*/
ret = do_fail_paste(); /* * The user space can retry several times until success (needed * for migration) or should fallback to SW compression or * manage with the existing open windows if available. * Looking at sysfs interface, it can determine whether these * failures are coming during migration or core removal: * nr_used_credits > nr_total_credits when lost credits
*/ if (!ret || (ret == -EAGAIN)) return VM_FAULT_NOPAGE;
return VM_FAULT_SIGBUS;
}
/* * During mmap() paste address, mapping VMA is saved in VAS window * struct which is used to unmap during migration if the window is * still open. But the user space can remove this mapping with * munmap() before closing the window and the VMA address will * be invalid. Set VAS window VMA to NULL in this function which * is called before VMA free.
*/ staticvoid vas_mmap_close(struct vm_area_struct *vma)
{ struct file *fp = vma->vm_file; struct coproc_instance *cp_inst = fp->private_data; struct vas_window *txwin;
/* Should not happen */ if (!cp_inst || !cp_inst->txwin) {
pr_err("No attached VAS window for the paste address mmap\n"); return;
}
txwin = cp_inst->txwin; /* * task_ref.vma is set in coproc_mmap() during mmap paste * address. So it has to be the same VMA that is getting freed.
*/ if (WARN_ON(txwin->task_ref.vma != vma)) {
pr_err("Invalid paste address mmaping\n"); return;
}
/* * Map complete page to the paste address. So the user * space should pass 0ULL to the offset parameter.
*/ if (vma->vm_pgoff) {
pr_debug("Page offset unsupported to map paste address\n"); return -EINVAL;
}
/* Ensure instance has an open send window */ if (!txwin) {
pr_err("No send window open?\n"); return -EINVAL;
}
if (!cp_inst->coproc->vops || !cp_inst->coproc->vops->paste_addr) {
pr_err("VAS API is not registered\n"); return -EACCES;
}
/* * The initial mmap is done after the window is opened * with ioctl. But before mmap(), this window can be closed in * the hypervisor due to lost credit (core removal on pseries). * So if the window is not active, return mmap() failure with * -EACCES and expects the user space reissue mmap() when it * is active again or open new window when the credit is available. * mmap_mutex protects the paste address mmap() with DLPAR * close/open event and allows mmap() only when the window is * active.
*/
guard(mutex)(&txwin->task_ref.mmap_mutex); if (txwin->status != VAS_WIN_ACTIVE) {
pr_err("Window is not active\n"); return -EACCES;
}
/* * Supporting only nx-gzip coprocessor type now, but this API code * extended to other coprocessor types later.
*/ int vas_register_coproc_api(struct module *mod, enum vas_cop_type cop_type, constchar *name, conststruct vas_user_win_ops *vops)
{ int rc = -EINVAL;
dev_t devno;
rc = alloc_chrdev_region(&coproc_device.devt, 1, 1, name); if (rc) {
pr_err("Unable to allocate coproc major number: %i\n", rc); return rc;
}
pr_devel("%s device allocated, dev [%i,%i]\n", name,
MAJOR(coproc_device.devt), MINOR(coproc_device.devt));
coproc_device.class = class_create(name); if (IS_ERR(coproc_device.class)) {
rc = PTR_ERR(coproc_device.class);
pr_err("Unable to create %s class %d\n", name, rc); goto err_class;
}
coproc_device.class->devnode = coproc_devnode;
coproc_device.cop_type = cop_type;
coproc_device.vops = vops;
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.30Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.