// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
*/
/* * Oracle Data Analytics Accelerator (DAX) * * DAX is a coprocessor which resides on the SPARC M7 (DAX1) and M8 * (DAX2) processor chips, and has direct access to the CPU's L3 * caches as well as physical memory. It can perform several * operations on data streams with various input and output formats. * The driver provides a transport mechanism only and has limited * knowledge of the various opcodes and data formats. A user space * library provides high level services and translates these into low * level commands which are then passed into the driver and * subsequently the hypervisor and the coprocessor. The library is * the recommended way for applications to use the coprocessor, and * the driver interface is not intended for general use. * * See Documentation/arch/sparc/oradax/oracle-dax.rst for more details.
*/
minor = minor_requested;
dax_dbg("Registering DAX HV api with major %ld minor %ld", major,
minor); if (sun4v_hvapi_register(HV_GRP_DAX, major, &minor)) {
dax_err("hvapi_register failed");
ret = -ENODEV; goto done;
} else {
dax_dbg("Max minor supported by HV = %ld (major %ld)", minor,
major);
minor = min(minor, minor_requested);
dax_dbg("registered DAX major %ld minor %ld", major, minor);
}
/* submit a zero length ccb array to query coprocessor queue size */
hv_rv = sun4v_ccb_submit(0, 0, HV_CCB_QUERY_CMD, 0, &max_ccbs, &dummy); if (hv_rv != 0) {
dax_err("get_hwqueue_size failed with status=%ld and max_ccbs=%ld",
hv_rv, max_ccbs);
ret = -ENODEV; goto done;
}
if (max_ccbs != DAX_MAX_CCBS) {
dax_err("HV reports unsupported max_ccbs=%ld", max_ccbs);
ret = -ENODEV; goto done;
}
if (alloc_chrdev_region(&first, 0, 1, DAX_NAME) < 0) {
dax_err("alloc_chrdev_region failed");
ret = -ENXIO; goto done;
}
ret = class_register(&cl); if (ret) goto class_error;
if (device_create(&cl, NULL, first, NULL, dax_name) == NULL) {
dax_err("device_create failed");
ret = -ENXIO; goto device_error;
}
cdev_init(&c_dev, &dax_fops); if (cdev_add(&c_dev, first, 1) == -1) {
dax_err("cdev_add failed");
ret = -ENXIO; goto cdev_error;
}
staticint dax_lock_page(void *va, struct page **p)
{ int ret;
dax_dbg("uva %p", va);
ret = pin_user_pages_fast((unsignedlong)va, 1, FOLL_WRITE, p); if (ret == 1) {
dax_dbg("locked page %p, for VA %p", *p, va); return 0;
}
dax_dbg("pin_user_pages failed, va=%p, ret=%d", va, ret); return -1;
}
staticint dax_lock_pages(struct dax_ctx *ctx, int idx, int nelem, u64 *err_va)
{ int i;
for (i = 0; i < nelem; i++) { struct dax_ccb *ccbp = &ctx->ccb_buf[i];
/* * For each address in the CCB whose type is virtual, * lock the page and change the type to virtual alternate * context. On error, return the offending address in * err_va.
*/ if (ccbp->hdr.out_addr_type == DAX_ADDR_TYPE_VA) {
dax_dbg("output"); if (dax_lock_page(ccbp->out,
&ctx->pages[i + idx][OUT]) != 0) {
*err_va = (u64)ccbp->out; goto error;
}
ccbp->hdr.out_addr_type = DAX_ADDR_TYPE_VA_ALT;
}
for (i = 0; i < DAX_CA_ELEMS; i++) { if (ctx->ca_buf[i].status == CCA_STAT_NOT_COMPLETED) {
dax_dbg("CCB[%d] not completed", i);
dax_ccb_wait(ctx, i);
}
dax_unlock_pages(ctx, i, 1);
}
case CCB_DEQUEUE: for (i = 0; i < DAX_CA_ELEMS; i++) { if (ctx->ca_buf[i].status !=
CCA_STAT_NOT_COMPLETED)
dax_unlock_pages(ctx, i, 1);
} return count;
staticvoid dax_prt_ccbs(struct dax_ccb *ccb, int nelem)
{ int i, j;
u64 *ccbp;
dax_dbg("ccb buffer:"); for (i = 0; i < nelem; i++) {
ccbp = (u64 *)&ccb[i];
dax_dbg(" %sccb[%d]", ccb[i].hdr.longccb ? "long " : "", i); for (j = 0; j < 8; j++)
dax_dbg("\tccb[%d].dwords[%d]=0x%llx",
i, j, *(ccbp + j));
}
}
/* * Validates user CCB content. Also sets completion address and address types * for all addresses contained in CCB.
*/ staticint dax_preprocess_usr_ccbs(struct dax_ctx *ctx, int idx, int nelem)
{ int i;
/* * The user is not allowed to specify real address types in * the CCB header. This must be enforced by the kernel before * submitting the CCBs to HV. The only allowed values for all * address fields are VA or IMM
*/ for (i = 0; i < nelem; i++) { struct dax_ccb *ccbp = &ctx->ccb_buf[i]; unsignedlong ca_offset;
if (ccbp->hdr.ccb_version > max_ccb_version) return DAX_SUBMIT_ERR_CCB_INVAL;
switch (ccbp->hdr.opcode) { case DAX_OP_SYNC_NOP: case DAX_OP_EXTRACT: case DAX_OP_SCAN_VALUE: case DAX_OP_SCAN_RANGE: case DAX_OP_TRANSLATE: case DAX_OP_SCAN_VALUE | DAX_OP_INVERT: case DAX_OP_SCAN_RANGE | DAX_OP_INVERT: case DAX_OP_TRANSLATE | DAX_OP_INVERT: case DAX_OP_SELECT: break; default: return DAX_SUBMIT_ERR_CCB_INVAL;
}
if (ccbp->hdr.out_addr_type != DAX_ADDR_TYPE_VA &&
ccbp->hdr.out_addr_type != DAX_ADDR_TYPE_NONE) {
dax_dbg("invalid out_addr_type in user CCB[%d]", i); return DAX_SUBMIT_ERR_CCB_INVAL;
}
if (ccbp->hdr.pri_addr_type != DAX_ADDR_TYPE_VA &&
ccbp->hdr.pri_addr_type != DAX_ADDR_TYPE_NONE) {
dax_dbg("invalid pri_addr_type in user CCB[%d]", i); return DAX_SUBMIT_ERR_CCB_INVAL;
}
if (ccbp->hdr.sec_addr_type != DAX_ADDR_TYPE_VA &&
ccbp->hdr.sec_addr_type != DAX_ADDR_TYPE_NONE) {
dax_dbg("invalid sec_addr_type in user CCB[%d]", i); return DAX_SUBMIT_ERR_CCB_INVAL;
}
if (ccbp->hdr.table_addr_type != DAX_ADDR_TYPE_VA &&
ccbp->hdr.table_addr_type != DAX_ADDR_TYPE_NONE) {
dax_dbg("invalid table_addr_type in user CCB[%d]", i); return DAX_SUBMIT_ERR_CCB_INVAL;
}
/* for given index and length, verify ca_buf range exists */ if (idx < 0 || idx > (DAX_CA_ELEMS - nccbs)) {
ctx->result.exec.status = DAX_SUBMIT_ERR_NO_CA_AVAIL; return 0;
}
/* * Copy CCBs into kernel buffer to prevent modification by the * user in between validation and submission.
*/ if (copy_from_user(ctx->ccb_buf, buf, count)) {
dax_dbg("copyin of user CCB buffer failed");
ctx->result.exec.status = DAX_SUBMIT_ERR_CCB_ARR_MMU_MISS; return 0;
}
/* check to see if ca_buf[idx] .. ca_buf[idx + nccbs] are available */ for (i = idx; i < idx + nccbs; i++) { if (ctx->ca_buf[i].status == CCA_STAT_NOT_COMPLETED) {
dax_dbg("CA range not available, dequeue needed");
ctx->result.exec.status = DAX_SUBMIT_ERR_NO_CA_AVAIL; return 0;
}
}
dax_unlock_pages(ctx, idx, nccbs);
switch (hv_rv) { case HV_EOK: /* * Hcall succeeded with no errors but the accepted * length may be less than the requested length. The * only way the driver can resubmit the remainder is * to wait for completion of the submitted CCBs since * there is no way to guarantee the ordering semantics * required by the client applications. Therefore we * let the user library deal with resubmissions.
*/
ctx->result.exec.status = DAX_SUBMIT_OK; break; case HV_EWOULDBLOCK: /* * This is a transient HV API error. The user library * can retry.
*/
dax_dbg("hcall returned HV_EWOULDBLOCK");
ctx->result.exec.status = DAX_SUBMIT_ERR_WOULDBLOCK; break; case HV_ENOMAP: /* * HV was unable to translate a VA. The VA it could * not translate is returned in the status_data param.
*/
dax_dbg("hcall returned HV_ENOMAP");
ctx->result.exec.status = DAX_SUBMIT_ERR_NOMAP; break; case HV_EINVAL: /* * This is the result of an invalid user CCB as HV is * validating some of the user CCB fields. Pass this * error back to the user. There is no supporting info * to isolate the invalid field.
*/
dax_dbg("hcall returned HV_EINVAL");
ctx->result.exec.status = DAX_SUBMIT_ERR_CCB_INVAL; break; case HV_ENOACCESS: /* * HV found a VA that did not have the appropriate * permissions (such as the w bit). The VA in question * is returned in status_data param.
*/
dax_dbg("hcall returned HV_ENOACCESS");
ctx->result.exec.status = DAX_SUBMIT_ERR_NOACCESS; break; case HV_EUNAVAILABLE: /* * The requested CCB operation could not be performed * at this time. Return the specific unavailable code * in the status_data field.
*/
dax_dbg("hcall returned HV_EUNAVAILABLE");
ctx->result.exec.status = DAX_SUBMIT_ERR_UNAVAIL; break; default:
ctx->result.exec.status = DAX_SUBMIT_ERR_INTERNAL;
dax_dbg("unknown hcall return value (%ld)", hv_rv); break;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.