// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
*/
staticenum resp_states check_op_seq(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{ switch (qp_type(qp)) { case IB_QPT_RC: switch (qp->resp.opcode) { case IB_OPCODE_RC_SEND_FIRST: case IB_OPCODE_RC_SEND_MIDDLE: switch (pkt->opcode) { case IB_OPCODE_RC_SEND_MIDDLE: case IB_OPCODE_RC_SEND_LAST: case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: return RESPST_CHK_OP_VALID; default: return RESPST_ERR_MISSING_OPCODE_LAST_C;
}
case IB_OPCODE_RC_RDMA_WRITE_FIRST: case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: switch (pkt->opcode) { case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: case IB_OPCODE_RC_RDMA_WRITE_LAST: case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: return RESPST_CHK_OP_VALID; default: return RESPST_ERR_MISSING_OPCODE_LAST_C;
}
default: switch (pkt->opcode) { case IB_OPCODE_RC_SEND_MIDDLE: case IB_OPCODE_RC_SEND_LAST: case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE: case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE: case IB_OPCODE_RC_RDMA_WRITE_MIDDLE: case IB_OPCODE_RC_RDMA_WRITE_LAST: case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE: return RESPST_ERR_MISSING_OPCODE_FIRST; default: return RESPST_CHK_OP_VALID;
}
} break;
case IB_QPT_UC: switch (qp->resp.opcode) { case IB_OPCODE_UC_SEND_FIRST: case IB_OPCODE_UC_SEND_MIDDLE: switch (pkt->opcode) { case IB_OPCODE_UC_SEND_MIDDLE: case IB_OPCODE_UC_SEND_LAST: case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: return RESPST_CHK_OP_VALID; default: return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
}
case IB_OPCODE_UC_RDMA_WRITE_FIRST: case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: switch (pkt->opcode) { case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: case IB_OPCODE_UC_RDMA_WRITE_LAST: case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE: return RESPST_CHK_OP_VALID; default: return RESPST_ERR_MISSING_OPCODE_LAST_D1E;
}
default: switch (pkt->opcode) { case IB_OPCODE_UC_SEND_MIDDLE: case IB_OPCODE_UC_SEND_LAST: case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE: case IB_OPCODE_UC_RDMA_WRITE_MIDDLE: case IB_OPCODE_UC_RDMA_WRITE_LAST: case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE:
qp->resp.drop_msg = 1; return RESPST_CLEANUP; default: return RESPST_CHK_OP_VALID;
}
} break;
if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { /* it is the requesters job to not send * too many read/atomic ops, we just * recycle the responder resource queue
*/ if (likely(qp->attr.max_dest_rd_atomic > 0)) return RESPST_CHK_LENGTH; else return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ;
}
if (pkt->mask & RXE_RWR_MASK) { if (srq) return get_srq_wqe(qp);
staticenum resp_states rxe_resp_check_length(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{ /* * See IBA C9-92 * For UD QPs we only check if the packet will fit in the * receive buffer later. For RDMA operations additional * length checks are performed in check_rkey.
*/ if ((qp_type(qp) == IB_QPT_GSI) || (qp_type(qp) == IB_QPT_UD)) { unsignedint payload = payload_size(pkt); unsignedint recv_buffer_len = 0; int i;
for (i = 0; i < qp->resp.wqe->dma.num_sge; i++)
recv_buffer_len += qp->resp.wqe->dma.sge[i].length; if (payload + sizeof(union rdma_network_hdr) > recv_buffer_len) {
rxe_dbg_qp(qp, "The receive buffer is too small for this UD packet.\n"); return RESPST_ERR_LENGTH;
}
}
if ((pkt->mask & RXE_START_MASK) &&
(pkt->mask & RXE_END_MASK)) { if (unlikely(payload > mtu)) {
rxe_dbg_qp(qp, "only packet too long\n"); return RESPST_ERR_LENGTH;
}
} elseif ((pkt->mask & RXE_START_MASK) ||
(pkt->mask & RXE_MIDDLE_MASK)) { if (unlikely(payload != mtu)) {
rxe_dbg_qp(qp, "first or middle packet not mtu\n"); return RESPST_ERR_LENGTH;
}
} elseif (pkt->mask & RXE_END_MASK) { if (unlikely((payload == 0) || (payload > mtu))) {
rxe_dbg_qp(qp, "last packet zero or too long\n"); return RESPST_ERR_LENGTH;
}
}
}
/* See IBA C9-94 */ if (pkt->mask & RXE_RETH_MASK) { if (reth_len(pkt) > (1U << 31)) {
rxe_dbg_qp(qp, "dma length too long\n"); return RESPST_ERR_LENGTH;
}
}
if (pkt->mask & RXE_RDMA_OP_MASK) return RESPST_CHK_RKEY; else return RESPST_EXECUTE;
}
/* if the reth length field is zero we can assume nothing * about the rkey value and should not validate or use it. * Instead set qp->resp.rkey to 0 which is an invalid rkey * value since the minimum index part is 1.
*/ staticvoid qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{ unsignedint length = reth_len(pkt);
/* resolve the packet rkey to qp->resp.mr or set qp->resp.mr to NULL * if an invalid rkey is received or the rdma length is zero. For middle * or last packets use the stored value of mr.
*/ staticenum resp_states check_rkey(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{ struct rxe_mr *mr = NULL; struct rxe_mw *mw = NULL;
u64 va;
u32 rkey;
u32 resid;
u32 pktlen; int mtu = qp->mtu; enum resp_states state; int access = 0;
/* parse RETH or ATMETH header for first/only packets * for va, length, rkey, etc. or use current value for * middle/last packets.
*/ if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { if (pkt->mask & RXE_RETH_MASK)
qp_resp_from_reth(qp, pkt);
/* A zero-byte read or write op is not required to * set an addr or rkey. See C9-88
*/ if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
(pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) {
qp->resp.mr = NULL; return RESPST_EXECUTE;
}
if (pkt->mask & RXE_FLUSH_MASK) { /* FLUSH MR may not set va or resid * no need to check range since we will flush whole mr
*/ if (feth_sel(pkt) == IB_FLUSH_MR) goto skip_check_range;
}
if (mr_check_range(mr, va + qp->resp.offset, resid)) {
state = RESPST_ERR_RKEY_VIOLATION; goto err;
}
skip_check_range: if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { if (resid > mtu) { if (pktlen != mtu || bth_pad(pkt)) {
state = RESPST_ERR_LENGTH; goto err;
}
} else { if (pktlen != resid) {
state = RESPST_ERR_LENGTH; goto err;
} if ((bth_pad(pkt) != (0x3 & (-resid)))) { /* This case may not be exactly that * but nothing else fits.
*/
state = RESPST_ERR_LENGTH; goto err;
}
}
}
WARN_ON_ONCE(qp->resp.mr);
qp->resp.mr = mr; return RESPST_EXECUTE;
err:
qp->resp.mr = NULL; if (mr)
rxe_put(mr); if (mw)
rxe_put(mw);
return state;
}
staticenum resp_states send_data_in(struct rxe_qp *qp, void *data_addr, int data_len)
{ int err;
if (res->flush.type & IB_FLUSH_PERSISTENT) { if (rxe_flush_pmem_iova(mr, start, length)) return RESPST_ERR_RKEY_VIOLATION; /* Make data persistent. */
wmb();
} elseif (res->flush.type & IB_FLUSH_GLOBAL) { /* Make data global visibility. */
wmb();
}
qp->resp.msn++;
/* next expected psn, read handles this separately */
qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
qp->resp.ack_psn = qp->resp.psn;
/** * rxe_recheck_mr - revalidate MR from rkey and get a reference * @qp: the qp * @rkey: the rkey * * This code allows the MR to be invalidated or deregistered or * the MW if one was used to be invalidated or deallocated. * It is assumed that the access permissions if originally good * are OK and the mappings to be unchanged. * * TODO: If someone reregisters an MR to change its size or * access permissions during the processing of an RDMA read * we should kill the responder resource and complete the * operation with an error. * * Return: mr on success else NULL
*/ staticstruct rxe_mr *rxe_recheck_mr(struct rxe_qp *qp, u32 rkey)
{ struct rxe_dev *rxe = to_rdev(qp->ibqp.device); struct rxe_mr *mr; struct rxe_mw *mw;
if (rkey_is_mw(rkey)) {
mw = rxe_pool_get_index(&rxe->mw_pool, rkey >> 8); if (!mw) return NULL;
/* RDMA read response. If res is not NULL, then we have a current RDMA request * being processed or replayed.
*/ staticenum resp_states read_reply(struct rxe_qp *qp, struct rxe_pkt_info *req_pkt)
{ struct rxe_pkt_info ack_pkt; struct sk_buff *skb; int mtu = qp->mtu; enum resp_states state; int payload; int opcode; int err; struct resp_res *res = qp->resp.res; struct rxe_mr *mr;
if (!res) {
res = rxe_prepare_res(qp, req_pkt, RXE_READ_MASK);
qp->resp.res = res;
}
if (res->state == rdatm_res_state_new) { if (!res->replay || qp->resp.length == 0) { /* if length == 0 mr will be NULL (is ok) * otherwise qp->resp.mr holds a ref on mr * which we transfer to mr and drop below.
*/
mr = qp->resp.mr;
qp->resp.mr = NULL;
} else {
mr = rxe_recheck_mr(qp, res->read.rkey); if (!mr) return RESPST_ERR_RKEY_VIOLATION;
}
if (res->read.resid <= mtu)
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY; else
opcode = IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST;
} else { /* re-lookup mr from rkey on all later packets. * length will be non-zero. This can fail if someone * modifies or destroys the mr since the first packet.
*/
mr = rxe_recheck_mr(qp, res->read.rkey); if (!mr) return RESPST_ERR_RKEY_VIOLATION;
/* Executes a new request. A retried request never reach that function (send * and writes are discarded, and reads and atomics are retried elsewhere.
*/ staticenum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
{ enum resp_states err; struct sk_buff *skb = PKT_TO_SKB(pkt); union rdma_network_hdr hdr;
/* Find the operation in our list of responder resources. */
res = find_resource(qp, pkt->psn); if (res) {
res->replay = 1;
res->cur_psn = pkt->psn;
qp->resp.res = res;
rc = RESPST_PROCESS_FLUSH; goto out;
}
/* Resource not found. Class D error. Drop the request. */
rc = RESPST_CLEANUP; goto out;
} elseif (pkt->mask & RXE_READ_MASK) { struct resp_res *res;
res = find_resource(qp, pkt->psn); if (!res) { /* Resource not found. Class D error. Drop the * request.
*/
rc = RESPST_CLEANUP; goto out;
} else { /* Ensure this new request is the same as the previous * one or a subset of it.
*/
u64 iova = reth_va(pkt);
u32 resid = reth_len(pkt);
/* Find the operation in our list of responder resources. */
res = find_resource(qp, pkt->psn); if (res) {
res->replay = 1;
res->cur_psn = pkt->psn;
qp->resp.res = res;
rc = pkt->mask & RXE_ATOMIC_MASK ?
RESPST_ATOMIC_REPLY :
RESPST_ATOMIC_WRITE_REPLY; goto out;
}
/* Resource not found. Class D error. Drop the request. */
rc = RESPST_CLEANUP; goto out;
}
out: return rc;
}
/* Process a class A or C. Both are treated the same in this implementation. */ staticvoid do_class_ac_error(struct rxe_qp *qp, u8 syndrome, enum ib_wc_status status)
{
qp->resp.aeth_syndrome = syndrome;
qp->resp.status = status;
/* indicate that we should go through the ERROR state */
qp->resp.goto_error = 1;
}
staticenum resp_states do_class_d1e_error(struct rxe_qp *qp)
{ /* UC */ if (qp->srq) { /* Class E */
qp->resp.drop_msg = 1; if (qp->resp.wqe) {
qp->resp.status = IB_WC_REM_INV_REQ_ERR; return RESPST_COMPLETE;
} else { return RESPST_CLEANUP;
}
} else { /* Class D1. This packet may be the start of a * new message and could be valid. The previous * message is invalid and ignored. reset the * recv wr to its original state
*/ if (qp->resp.wqe) {
qp->resp.wqe->dma.resid = qp->resp.wqe->dma.length;
qp->resp.wqe->dma.cur_sge = 0;
qp->resp.wqe->dma.sge_offset = 0;
qp->resp.opcode = -1;
}
if (qp->resp.mr) {
rxe_put(qp->resp.mr);
qp->resp.mr = NULL;
}
/* drain and optionally complete the recive queue * if unable to complete a wqe stop completing and * just flush the remaining wqes
*/ staticvoid flush_recv_queue(struct rxe_qp *qp, bool notify)
{ struct rxe_queue *q = qp->rq.queue; struct rxe_recv_wqe *wqe; int err;
if (qp->srq) { if (notify && qp->ibqp.event_handler) { struct ib_event ev;
while (1) {
rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); switch (state) { case RESPST_GET_REQ:
state = get_req(qp, &pkt); break; case RESPST_CHK_PSN:
state = check_psn(qp, pkt); break; case RESPST_CHK_OP_SEQ:
state = check_op_seq(qp, pkt); break; case RESPST_CHK_OP_VALID:
state = check_op_valid(qp, pkt); break; case RESPST_CHK_RESOURCE:
state = check_resource(qp, pkt); break; case RESPST_CHK_LENGTH:
state = rxe_resp_check_length(qp, pkt); break; case RESPST_CHK_RKEY:
state = check_rkey(qp, pkt); break; case RESPST_EXECUTE:
state = execute(qp, pkt); break; case RESPST_COMPLETE:
state = do_complete(qp, pkt); break; case RESPST_READ_REPLY:
state = read_reply(qp, pkt); break; case RESPST_ATOMIC_REPLY:
state = atomic_reply(qp, pkt); break; case RESPST_ATOMIC_WRITE_REPLY:
state = atomic_write_reply(qp, pkt); break; case RESPST_PROCESS_FLUSH:
state = process_flush(qp, pkt); break; case RESPST_ACKNOWLEDGE:
state = acknowledge(qp, pkt); break; case RESPST_CLEANUP:
state = cleanup(qp, pkt); break; case RESPST_DUPLICATE_REQUEST:
state = duplicate_request(qp, pkt); break; case RESPST_ERR_PSN_OUT_OF_SEQ: /* RC only - Class B. Drop packet. */
send_ack(qp, AETH_NAK_PSN_SEQ_ERROR, qp->resp.psn);
state = RESPST_CLEANUP; break;
case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ: case RESPST_ERR_MISSING_OPCODE_FIRST: case RESPST_ERR_MISSING_OPCODE_LAST_C: case RESPST_ERR_UNSUPPORTED_OPCODE: case RESPST_ERR_MISALIGNED_ATOMIC: /* RC Only - Class C. */
do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
IB_WC_REM_INV_REQ_ERR);
state = RESPST_COMPLETE; break;
case RESPST_ERR_MISSING_OPCODE_LAST_D1E:
state = do_class_d1e_error(qp); break; case RESPST_ERR_RNR: if (qp_type(qp) == IB_QPT_RC) {
rxe_counter_inc(rxe, RXE_CNT_SND_RNR); /* RC - class B */
send_ack(qp, AETH_RNR_NAK |
(~AETH_TYPE_MASK &
qp->attr.min_rnr_timer),
pkt->psn);
} else { /* UD/UC - class D */
qp->resp.drop_msg = 1;
}
state = RESPST_CLEANUP; break;
case RESPST_ERR_RKEY_VIOLATION: if (qp_type(qp) == IB_QPT_RC) { /* Class C */
do_class_ac_error(qp, AETH_NAK_REM_ACC_ERR,
IB_WC_REM_ACCESS_ERR);
state = RESPST_COMPLETE;
} else {
qp->resp.drop_msg = 1; if (qp->srq) { /* UC/SRQ Class D */
qp->resp.status = IB_WC_REM_ACCESS_ERR;
state = RESPST_COMPLETE;
} else { /* UC/non-SRQ Class E. */
state = RESPST_CLEANUP;
}
} break;
case RESPST_ERR_INVALIDATE_RKEY: /* RC - Class J. */
qp->resp.goto_error = 1;
qp->resp.status = IB_WC_REM_INV_REQ_ERR;
state = RESPST_COMPLETE; break;
case RESPST_ERR_LENGTH: if (qp_type(qp) == IB_QPT_RC) { /* Class C */
do_class_ac_error(qp, AETH_NAK_INVALID_REQ,
IB_WC_REM_INV_REQ_ERR);
state = RESPST_COMPLETE;
} elseif (qp->srq) { /* UC/UD - class E */
qp->resp.status = IB_WC_REM_INV_REQ_ERR;
state = RESPST_COMPLETE;
} else { /* UC/UD - class D */
qp->resp.drop_msg = 1;
state = RESPST_CLEANUP;
} break;
case RESPST_ERR_MALFORMED_WQE: /* All, Class A. */
do_class_ac_error(qp, AETH_NAK_REM_OP_ERR,
IB_WC_LOC_QP_OP_ERR);
state = RESPST_COMPLETE; break;
case RESPST_ERR_CQ_OVERFLOW: /* All - Class G */
state = RESPST_ERROR; break;
case RESPST_DONE: if (qp->resp.goto_error) {
state = RESPST_ERROR; break;
}
goto done;
case RESPST_EXIT: if (qp->resp.goto_error) {
state = RESPST_ERROR; break;
}
gotoexit;
case RESPST_ERROR:
qp->resp.goto_error = 0;
rxe_dbg_qp(qp, "moved to error state\n");
rxe_qp_error(qp); gotoexit;
default:
WARN_ON_ONCE(1);
}
}
/* A non-zero return value will cause rxe_do_task to * exit its loop and end the work item. A zero return * will continue looping and return to rxe_responder
*/
done:
ret = 0; goto out; exit:
ret = -EAGAIN;
out: return ret;
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.8 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.