/* Port numbers for SMBD transport */ #define SMB_PORT 445 #define SMBD_PORT 5445
/* Address lookup and resolve timeout in ms */ #define RDMA_RESOLVE_TIMEOUT 5000
/* SMBD negotiation timeout in seconds */ #define SMBD_NEGOTIATE_TIMEOUT 120
/* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */ #define SMBD_MIN_RECEIVE_SIZE 128 #define SMBD_MIN_FRAGMENTED_SIZE 131072
/* * Default maximum number of RDMA read/write outstanding on this connection * This value is possibly decreased during QP creation on hardware limit
*/ #define SMBD_CM_RESPONDER_RESOURCES 32
/* Maximum number of retries on data transfer operations */ #define SMBD_CM_RETRY 6 /* No need to retry on Receiver Not Ready since SMBD manages credits */ #define SMBD_CM_RNR_RETRY 0
/* * User configurable initial values per SMBD transport connection * as defined in [MS-SMBD] 3.1.1.1 * Those may change after a SMBD negotiation
*/ /* The local peer's maximum number of credits to grant to the peer */ int smbd_receive_credit_max = 255;
/* The remote peer's credit request of local peer */ int smbd_send_credit_target = 255;
/* The maximum single message size can be sent to remote peer */ int smbd_max_send_size = 1364;
/* The maximum fragmented upper-layer payload receive size supported */ int smbd_max_fragmented_recv_size = 1024 * 1024;
/* The maximum single-message size which can be received */ int smbd_max_receive_size = 1364;
/* The timeout to initiate send of a keepalive message on idle */ int smbd_keep_alive_interval = 120;
/* * User configurable initial values for RDMA transport * The actual values used may be lower and are limited to hardware capabilities
*/ /* Default maximum number of pages in a single RDMA write/read */ int smbd_max_frmr_depth = 2048;
/* If payload is less than this byte, use RDMA send/recv not read/write */ int rdma_readwrite_threshold = 4096;
/* Transport logging functions * Logging are defined as classes. They can be OR'ed to define the actual * logging level via module parameter smbd_logging_class * e.g. cifs.smbd_logging_class=0xa0 will log all log_rdma_recv() and * log_rdma_event()
*/ #define LOG_OUTGOING 0x1 #define LOG_INCOMING 0x2 #define LOG_READ 0x4 #define LOG_WRITE 0x8 #define LOG_RDMA_SEND 0x10 #define LOG_RDMA_RECV 0x20 #define LOG_KEEP_ALIVE 0x40 #define LOG_RDMA_EVENT 0x80 #define LOG_RDMA_MR 0x100 staticunsignedint smbd_logging_class;
module_param(smbd_logging_class, uint, 0644);
MODULE_PARM_DESC(smbd_logging_class, "Logging class for SMBD transport 0x0 to 0x100");
case RDMA_CM_EVENT_ESTABLISHED:
log_rdma_event(INFO, "connected event=%s\n", event_name);
/* * Here we work around an inconsistency between * iWarp and other devices (at least rxe and irdma using RoCEv2)
*/ if (rdma_protocol_iwarp(id->device, id->port_num)) { /* * iWarp devices report the peer's values * with the perspective of the peer here. * Tested with siw and irdma (in iwarp mode) * We need to change to our perspective here, * so we need to switch the values.
*/
peer_initiator_depth = event->param.conn.responder_resources;
peer_responder_resources = event->param.conn.initiator_depth;
} else { /* * Non iWarp devices report the peer's values * already changed to our perspective here. * Tested with rxe and irdma (in roce mode).
*/
peer_initiator_depth = event->param.conn.initiator_depth;
peer_responder_resources = event->param.conn.responder_resources;
} if (rdma_protocol_iwarp(id->device, id->port_num) &&
event->param.conn.private_data_len == 8) { /* * Legacy clients with only iWarp MPA v1 support * need a private blob in order to negotiate * the IRD/ORD values.
*/ const __be32 *ird_ord_hdr = event->param.conn.private_data;
u32 ird32 = be32_to_cpu(ird_ord_hdr[0]);
u32 ord32 = be32_to_cpu(ird_ord_hdr[1]);
/* * cifs.ko sends the legacy IRD/ORD negotiation * event if iWarp MPA v2 was used. * * Here we check that the values match and only * mark the client as legacy if they don't match.
*/ if ((u32)event->param.conn.initiator_depth != ird32 ||
(u32)event->param.conn.responder_resources != ord32) { /* * There are broken clients (old cifs.ko) * using little endian and also * struct rdma_conn_param only uses u8 * for initiator_depth and responder_resources, * so we truncate the value to U8_MAX. * * smb_direct_accept_client() will then * do the real negotiation in order to * select the minimum between client and * server.
*/
ird32 = min_t(u32, ird32, U8_MAX);
ord32 = min_t(u32, ord32, U8_MAX);
/* * negotiate the value by using the minimum * between client and server if the client provided * non 0 values.
*/ if (peer_initiator_depth != 0)
info->initiator_depth =
min_t(u8, info->initiator_depth,
peer_initiator_depth); if (peer_responder_resources != 0)
info->responder_resources =
min_t(u8, info->responder_resources,
peer_responder_resources);
case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED:
log_rdma_event(ERR, "connecting failed event=%s\n", event_name);
sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
wake_up_interruptible(&info->status_wait); break;
case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DISCONNECTED: /* This happens when we fail the negotiation */ if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) {
log_rdma_event(ERR, "event=%s during negotiation\n", event_name);
sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
wake_up(&info->status_wait); break;
}
if (data_length) { if (sc->recv_io.reassembly.full_packet_received)
response->first_segment = true;
if (le32_to_cpu(data_transfer->remaining_data_length))
sc->recv_io.reassembly.full_packet_received = false; else
sc->recv_io.reassembly.full_packet_received = true;
}
atomic_dec(&info->receive_credits);
old_recv_credit_target = info->receive_credit_target;
info->receive_credit_target =
le16_to_cpu(data_transfer->credits_requested);
info->receive_credit_target =
min_t(u16, info->receive_credit_target, sp->recv_credit_max);
info->receive_credit_target =
max_t(u16, info->receive_credit_target, 1); if (le16_to_cpu(data_transfer->credits_granted)) {
atomic_add(le16_to_cpu(data_transfer->credits_granted),
&info->send_credits); /* * We have new send credits granted from remote peer * If any sender is waiting for credits, unblock it
*/
wake_up_interruptible(&info->wait_send_queue);
}
/* Send a KEEP_ALIVE response right away if requested */
info->keep_alive_requested = KEEP_ALIVE_NONE; if (le16_to_cpu(data_transfer->flags) &
SMBDIRECT_FLAG_RESPONSE_REQUESTED) {
info->keep_alive_requested = KEEP_ALIVE_PENDING;
}
/* * If this is a packet with data playload place the data in * reassembly queue and wake up the reading thread
*/ if (data_length) { if (info->receive_credit_target > old_recv_credit_target)
queue_work(info->workqueue, &info->post_send_credits_work);
/* * Test if FRWR (Fast Registration Work Requests) is supported on the device * This implementation requires FRWR on RDMA read/write * return value: true if it is supported
*/ staticbool frwr_is_supported(struct ib_device_attr *attrs)
{ if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) returnfalse; if (attrs->max_fast_reg_page_list_len == 0) returnfalse; returntrue;
}
staticint smbd_ia_open( struct smbd_connection *info, struct sockaddr *dstaddr, int port)
{ struct smbdirect_socket *sc = &info->socket; int rc;
/* * Send a negotiation request message to the peer * The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3 * After negotiation, the transport is connected and ready for * carrying upper layer SMB payload
*/ staticint smbd_post_send_negotiate_req(struct smbd_connection *info)
{ struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc = -ENOMEM; struct smbdirect_send_io *request; struct smbdirect_negotiate_req *packet;
request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL); if (!request) return rc;
/* * Extend the credits to remote peer * This implements [MS-SMBD] 3.1.5.9 * The idea is that we should extend credits to remote peer as quickly as * it's allowed, to maintain data flow. We allocate as much receive * buffer as possible, and extend the receive credits to remote peer * return value: the new credtis being granted.
*/ staticint manage_credits_prior_sending(struct smbd_connection *info)
{ int new_credits;
/* * Check if we need to send a KEEP_ALIVE message * The idle connection timer triggers a KEEP_ALIVE message when expires * SMBDIRECT_FLAG_RESPONSE_REQUESTED is set in the message flag to have peer send * back a response. * return value: * 1 if SMBDIRECT_FLAG_RESPONSE_REQUESTED needs to be set * 0: otherwise
*/ staticint manage_keep_alive_before_sending(struct smbd_connection *info)
{ if (info->keep_alive_requested == KEEP_ALIVE_PENDING) {
info->keep_alive_requested = KEEP_ALIVE_SENT; return 1;
} return 0;
}
/* Post the send request */ staticint smbd_post_send(struct smbd_connection *info, struct smbdirect_send_io *request)
{ struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc, i;
for (i = 0; i < request->num_sge; i++) {
log_rdma_send(INFO, "rdma_request sge[%d] addr=0x%llx length=%u\n",
i, request->sge[i].addr, request->sge[i].length);
ib_dma_sync_single_for_device(
sc->ib.dev,
request->sge[i].addr,
request->sge[i].length,
DMA_TO_DEVICE);
}
/* Fill in the data payload to find out how much data we can add */ if (iter) { struct smb_extract_to_rdma extract = {
.nr_sge = 1,
.max_sge = SMBDIRECT_SEND_IO_MAX_SGE,
.sge = request->sge,
.device = sc->ib.dev,
.local_dma_lkey = sc->ib.pd->local_dma_lkey,
.direction = DMA_TO_DEVICE,
};
size_t payload_len = umin(*_remaining_data_length,
sp->max_send_size - sizeof(*packet));
/* Map the packet to DMA */
header_length = sizeof(struct smbdirect_data_transfer); /* If this is a packet without payload, don't send padding */ if (!data_length)
header_length = offsetof(struct smbdirect_data_transfer, padding);
rc = smbd_post_send(info, request); if (!rc) return 0;
err_dma: for (i = 0; i < request->num_sge; i++) if (request->sge[i].addr)
ib_dma_unmap_single(sc->ib.dev,
request->sge[i].addr,
request->sge[i].length,
DMA_TO_DEVICE);
mempool_free(request, sc->send_io.mem.pool);
/* roll back receive credits and credits to be offered */
spin_lock(&info->lock_new_credits_offered);
info->new_credits_offered += new_credits;
spin_unlock(&info->lock_new_credits_offered);
atomic_sub(new_credits, &info->receive_credits);
err_alloc: if (atomic_dec_and_test(&info->send_pending))
wake_up(&info->wait_send_pending);
err_wait_send_queue: /* roll back send credits and pending */
atomic_inc(&info->send_credits);
err_wait_credit: return rc;
}
/* * Send an empty message * Empty message is used to extend credits to peer to for keep live * while there is no upper layer payload to send at the time
*/ staticint smbd_post_send_empty(struct smbd_connection *info)
{ int remaining_data_length = 0;
staticint smbd_post_send_full_iter(struct smbd_connection *info, struct iov_iter *iter, int *_remaining_data_length)
{ int rc = 0;
/* * smbd_post_send_iter() respects the * negotiated max_send_size, so we need to * loop until the full iter is posted
*/
while (iov_iter_count(iter) > 0) {
rc = smbd_post_send_iter(info, iter, _remaining_data_length); if (rc < 0) break;
}
return rc;
}
/* * Post a receive request to the transport * The remote peer can only send data when a receive request is posted * The interaction is controlled by send/receive credit system
*/ staticint smbd_post_recv( struct smbd_connection *info, struct smbdirect_recv_io *response)
{ struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_recv_wr recv_wr; int rc = -EIO;
/* * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1 * This is a queue for reassembling upper layer payload and present to upper * layer. All the inncoming payload go to the reassembly queue, regardless of * if reassembly is required. The uuper layer code reads from the queue for all * incoming payloads. * Put a received packet to the reassembly queue * response: the packet received * data_length: the size of payload in this packet
*/ staticvoid enqueue_reassembly( struct smbd_connection *info, struct smbdirect_recv_io *response, int data_length)
{ struct smbdirect_socket *sc = &info->socket;
spin_lock(&sc->recv_io.reassembly.lock);
list_add_tail(&response->list, &sc->recv_io.reassembly.list);
sc->recv_io.reassembly.queue_length++; /* * Make sure reassembly_data_length is updated after list and * reassembly_queue_length are updated. On the dequeue side * reassembly_data_length is checked without a lock to determine * if reassembly_queue_length and list is up to date
*/
virt_wmb();
sc->recv_io.reassembly.data_length += data_length;
spin_unlock(&sc->recv_io.reassembly.lock);
info->count_reassembly_queue++;
info->count_enqueue_reassembly_queue++;
}
/* * Get the first entry at the front of reassembly queue * Caller is responsible for locking * return value: the first entry if any, NULL if queue is empty
*/ staticstruct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *info)
{ struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *ret = NULL;
if (!list_empty(&sc->recv_io.reassembly.list)) {
ret = list_first_entry(
&sc->recv_io.reassembly.list, struct smbdirect_recv_io, list);
} return ret;
}
/* * Get a receive buffer * For each remote send, we need to post a receive. The receive buffers are * pre-allocated in advance. * return value: the receive buffer, NULL if none is available
*/ staticstruct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info)
{ struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *ret = NULL; unsignedlong flags;
spin_lock_irqsave(&sc->recv_io.free.lock, flags); if (!list_empty(&sc->recv_io.free.list)) {
ret = list_first_entry(
&sc->recv_io.free.list, struct smbdirect_recv_io, list);
list_del(&ret->list);
info->count_receive_queue--;
info->count_get_receive_buffer++;
}
spin_unlock_irqrestore(&sc->recv_io.free.lock, flags);
return ret;
}
/* * Return a receive buffer * Upon returning of a receive buffer, we can post new receive and extend * more receive credits to remote peer. This is done immediately after a * receive buffer is returned.
*/ staticvoid put_receive_buffer( struct smbd_connection *info, struct smbdirect_recv_io *response)
{ struct smbdirect_socket *sc = &info->socket; unsignedlong flags;
if (info->keep_alive_requested != KEEP_ALIVE_NONE) {
log_keep_alive(ERR, "error status info->keep_alive_requested=%d\n",
info->keep_alive_requested);
smbd_disconnect_rdma_connection(info); return;
}
log_keep_alive(INFO, "about to send an empty idle message\n");
smbd_post_send_empty(info);
/* Setup the next idle timeout work */
queue_delayed_work(info->workqueue, &info->idle_timer_work,
msecs_to_jiffies(sp->keepalive_interval_msec));
}
/* * Destroy the transport and related RDMA and memory resources * Need to go through all the pending counters and make sure on one is using * the transport while it is destroyed
*/ void smbd_destroy(struct TCP_Server_Info *server)
{ struct smbd_connection *info = server->smbd_conn; struct smbdirect_socket *sc; struct smbdirect_socket_parameters *sp; struct smbdirect_recv_io *response; unsignedlong flags;
/* It's not possible for upper layer to get to reassembly */
log_rdma_event(INFO, "drain the reassembly queue\n"); do {
spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags);
response = _get_first_reassembly(info); if (response) {
list_del(&response->list);
spin_unlock_irqrestore(
&sc->recv_io.reassembly.lock, flags);
put_receive_buffer(info, response);
} else
spin_unlock_irqrestore(
&sc->recv_io.reassembly.lock, flags);
} while (response);
sc->recv_io.reassembly.data_length = 0;
/* * For performance reasons, memory registration and deregistration * are not locked by srv_mutex. It is possible some processes are * blocked on transport srv_mutex while holding memory registration. * Release the transport srv_mutex to allow them to hit the failure * path when sending data, and then release memory registrations.
*/
log_rdma_event(INFO, "freeing mr list\n");
wake_up_interruptible_all(&info->wait_mr); while (atomic_read(&info->mr_used_count)) {
cifs_server_unlock(server);
msleep(1000);
cifs_server_lock(server);
}
destroy_mr_list(info);
/* * Reconnect this SMBD connection, called from upper layer * return value: 0 on success, or actual error code
*/ int smbd_reconnect(struct TCP_Server_Info *server)
{
log_rdma_event(INFO, "reconnecting rdma session\n");
/* * This is possible if transport is disconnected and we haven't received * notification from RDMA, but upper layer has detected timeout
*/ if (server->smbd_conn->socket.status == SMBDIRECT_SOCKET_CONNECTED) {
log_rdma_event(INFO, "disconnecting transport\n");
smbd_destroy(server);
}
struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr)
{ struct smbd_connection *ret; int port = SMBD_PORT;
try_again:
ret = _smbd_get_connection(server, dstaddr, port);
/* Try SMB_PORT if SMBD_PORT doesn't work */ if (!ret && port == SMBD_PORT) {
port = SMB_PORT; goto try_again;
} return ret;
}
/* * Receive data from the transport's receive reassembly queue * All the incoming data packets are placed in reassembly queue * iter: the buffer to read data into * size: the length of data to read * return value: actual data read * * Note: this implementation copies the data from reassembly queue to receive * buffers used by upper layer. This is not the optimal code path. A better way * to do it is to not have upper layer allocate its receive buffers but rather * borrow the buffer from reassembly queue, and return it after data is * consumed. But this will require more changes to upper layer code, and also * need to consider packet boundaries while they still being reassembled.
*/ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
{ struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *response; struct smbdirect_data_transfer *data_transfer;
size_t size = iov_iter_count(&msg->msg_iter); int to_copy, to_read, data_read, offset;
u32 data_length, remaining_data_length, data_offset; int rc;
if (WARN_ON_ONCE(iov_iter_rw(&msg->msg_iter) == WRITE)) return -EINVAL; /* It's a bug in upper layer to get there */
again: /* * No need to hold the reassembly queue lock all the time as we are * the only one reading from the front of the queue. The transport * may add more entries to the back of the queue at the same time
*/
log_read(INFO, "size=%zd sc->recv_io.reassembly.data_length=%d\n", size,
sc->recv_io.reassembly.data_length); if (sc->recv_io.reassembly.data_length >= size) { int queue_length; int queue_removed = 0;
/* * Need to make sure reassembly_data_length is read before * reading reassembly_queue_length and calling * _get_first_reassembly. This call is lock free * as we never read at the end of the queue which are being * updated in SOFTIRQ as more data is received
*/
virt_rmb();
queue_length = sc->recv_io.reassembly.queue_length;
data_read = 0;
to_read = size;
offset = sc->recv_io.reassembly.first_entry_offset; while (data_read < size) {
response = _get_first_reassembly(info);
data_transfer = smbdirect_recv_io_payload(response);
data_length = le32_to_cpu(data_transfer->data_length);
remaining_data_length =
le32_to_cpu(
data_transfer->remaining_data_length);
data_offset = le32_to_cpu(data_transfer->data_offset);
/* * The upper layer expects RFC1002 length at the * beginning of the payload. Return it to indicate * the total length of the packet. This minimize the * change to upper layer packet processing logic. This * will be eventually remove when an intermediate * transport layer is added
*/ if (response->first_segment && size == 4) { unsignedint rfc1002_len =
data_length + remaining_data_length;
__be32 rfc1002_hdr = cpu_to_be32(rfc1002_len); if (copy_to_iter(&rfc1002_hdr, sizeof(rfc1002_hdr),
&msg->msg_iter) != sizeof(rfc1002_hdr)) return -EFAULT;
data_read = 4;
response->first_segment = false;
log_read(INFO, "returning rfc1002 length %d\n",
rfc1002_len); goto read_rfc1002_done;
}
/* move on to the next buffer? */ if (to_copy == data_length - offset) {
queue_length--; /* * No need to lock if we are not at the * end of the queue
*/ if (queue_length)
list_del(&response->list); else {
spin_lock_irq(
&sc->recv_io.reassembly.lock);
list_del(&response->list);
spin_unlock_irq(
&sc->recv_io.reassembly.lock);
}
queue_removed++;
info->count_reassembly_queue--;
info->count_dequeue_reassembly_queue++;
put_receive_buffer(info, response);
offset = 0;
log_read(INFO, "put_receive_buffer offset=0\n");
} else
offset += to_copy;
to_read -= to_copy;
data_read += to_copy;
log_read(INFO, "_get_first_reassembly memcpy %d bytes data_transfer_length-offset=%d after that to_read=%d data_read=%d offset=%d\n",
to_copy, data_length - offset,
to_read, data_read, offset);
}
log_read(INFO, "wait_event on more data\n");
rc = wait_event_interruptible(
sc->recv_io.reassembly.wait_queue,
sc->recv_io.reassembly.data_length >= size ||
sc->status != SMBDIRECT_SOCKET_CONNECTED); /* Don't return any data if interrupted */ if (rc) return rc;
if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
log_read(ERR, "disconnected\n"); return -ECONNABORTED;
}
goto again;
}
/* * Send data to transport * Each rqst is transported as a SMBDirect payload * rqst: the data to write * return value: 0 if successfully write, otherwise error code
*/ int smbd_send(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst_array)
{ struct smbd_connection *info = server->smbd_conn; struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; struct smb_rqst *rqst; struct iov_iter iter; unsignedint remaining_data_length, klen; int rc, i, rqst_idx;
if (sc->status != SMBDIRECT_SOCKET_CONNECTED) return -EAGAIN;
/* * Add in the page array if there is one. The caller needs to set * rq_tailsz to PAGE_SIZE when the buffer has multiple pages and * ends at page boundary
*/
remaining_data_length = 0; for (i = 0; i < num_rqst; i++)
remaining_data_length += smb_rqst_len(server, &rqst_array[i]);
if (unlikely(remaining_data_length > sp->max_fragmented_send_size)) { /* assertion: payload never exceeds negotiated maximum */
log_write(ERR, "payload size %d > max size %d\n",
remaining_data_length, sp->max_fragmented_send_size); return -EINVAL;
}
log_write(INFO, "num_rqst=%d total length=%u\n",
num_rqst, remaining_data_length);
rqst_idx = 0; do {
rqst = &rqst_array[rqst_idx];
cifs_dbg(FYI, "Sending smb (RDMA): idx=%d smb_len=%lu\n",
rqst_idx, smb_rqst_len(server, rqst)); for (i = 0; i < rqst->rq_nvec; i++)
dump_smb(rqst->rq_iov[i].iov_base, rqst->rq_iov[i].iov_len);
/* Send the metadata pages. */
klen = 0; for (i = 0; i < rqst->rq_nvec; i++)
klen += rqst->rq_iov[i].iov_len;
iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen);
rc = smbd_post_send_full_iter(info, &iter, &remaining_data_length); if (rc < 0) break;
if (iov_iter_count(&rqst->rq_iter) > 0) { /* And then the data pages if there are any */
rc = smbd_post_send_full_iter(info, &rqst->rq_iter,
&remaining_data_length); if (rc < 0) break;
}
} while (++rqst_idx < num_rqst);
/* * As an optimization, we don't wait for individual I/O to finish * before sending the next one. * Send them all and wait for pending send count to get to 0 * that means all the I/Os have been out and we are good to return
*/
/* * The work queue function that recovers MRs * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used * again. Both calls are slow, so finish them in a workqueue. This will not * block I/O path. * There is one workqueue that recovers MRs, there is no need to lock as the * I/O requests calling smbd_register_mr will never update the links in the * mr_list.
*/ staticvoid smbd_mr_recovery_work(struct work_struct *work)
{ struct smbd_connection *info =
container_of(work, struct smbd_connection, mr_recovery_work); struct smbdirect_socket *sc = &info->socket; struct smbd_mr *smbdirect_mr; int rc;
list_for_each_entry(smbdirect_mr, &info->mr_list, list) { if (smbdirect_mr->state == MR_ERROR) {
/* recover this MR entry */
rc = ib_dereg_mr(smbdirect_mr->mr); if (rc) {
log_rdma_mr(ERR, "ib_dereg_mr failed rc=%x\n",
rc);
smbd_disconnect_rdma_connection(info); continue;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.