/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ /* * Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the BSD-type * license below: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * Neither the name of the Network Appliance, Inc. nor the names of * its contributors may be used to endorse or promote products * derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Pre-allocate extra Work Requests for handling reverse-direction * Receives and Sends. This is a fixed value because the Work Queues * are allocated when the forward channel is set up, long before the * backchannel is provisioned. This value is two times * NFS4_DEF_CB_SLOT_TABLE_SIZE.
*/ #ifdefined(CONFIG_SUNRPC_BACKCHANNEL) #define RPCRDMA_BACKWARD_WRS (32) #else #define RPCRDMA_BACKWARD_WRS (0) #endif
/* Do not use emergency memory reserves, and fail quickly if memory * cannot be allocated easily. These flags may be used wherever there * is robust logic to handle a failure to allocate.
*/ #define XPRTRDMA_GFP_FLAGS (__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN)
/* To ensure a transport can always make forward progress, * the number of RDMA segments allowed in header chunk lists * is capped at 16. This prevents less-capable devices from * overrunning the Send buffer while building chunk lists. * * Elements of the Read list take up more room than the * Write list or Reply chunk. 16 read segments means the * chunk lists cannot consume more than * * ((16 + 2) * read segment size) + 1 XDR words, * * or about 400 bytes. The fixed part of the header is * another 24 bytes. Thus when the inline threshold is * 1024 bytes, at least 600 bytes are available for RPC * message bodies.
*/ enum {
RPCRDMA_MAX_HDR_SEGS = 16,
};
/* * struct rpcrdma_rep -- this structure encapsulates state required * to receive and complete an RPC Reply, asychronously. It needs * several pieces of state: * * o receive buffer and ib_sge (donated to provider) * o status of receive (success or not, length, inv rkey) * o bookkeeping state to get run by reply handler (XDR stream) * * These structures are allocated during transport initialization. * N of these are associated with a transport instance, managed by * struct rpcrdma_buffer. N is the max number of outstanding RPCs.
*/
/* To reduce the rate at which a transport invokes ib_post_recv * (and thus the hardware doorbell rate), xprtrdma posts Receive * WRs in batches. * * Setting this to zero disables Receive post batching.
*/ enum {
RPCRDMA_MAX_RECV_BATCH = 7,
};
/* * struct rpcrdma_mr - external memory region metadata * * An external memory region is any buffer or page that is registered * on the fly (ie, not pre-registered).
*/ struct rpcrdma_req; struct rpcrdma_mr { struct list_head mr_list; struct rpcrdma_req *mr_req;
/* * struct rpcrdma_req -- structure central to the request/reply sequence. * * N of these are associated with a transport instance, and stored in * struct rpcrdma_buffer. N is the max number of outstanding requests. * * It includes pre-registered buffer memory for send AND recv. * The recv buffer, however, is not owned by this structure, and * is "donated" to the hardware when a recv is posted. When a * reply is handled, the recv buffer used is given back to the * struct rpcrdma_req associated with the request. * * In addition to the basic memory, this structure includes an array * of iovs for send operations. The reason is that the iovs passed to * ib_post_{send,recv} must not be modified until the work request * completes.
*/
/* Maximum number of page-sized "segments" per chunk list to be * registered or invalidated. Must handle a Reply chunk:
*/ enum {
RPCRDMA_MAX_IOV_SEGS = 3,
RPCRDMA_MAX_DATA_SEGS = ((1 * 1024 * 1024) / PAGE_SIZE) + 1,
RPCRDMA_MAX_SEGS = RPCRDMA_MAX_DATA_SEGS +
RPCRDMA_MAX_IOV_SEGS,
};
/* The Send SGE array is provisioned to send a maximum size * inline request: * - RPC-over-RDMA header * - xdr_buf head iovec * - RPCRDMA_MAX_INLINE bytes, in pages * - xdr_buf tail iovec * * The actual number of array elements consumed by each RPC * depends on the device's max_sge limit.
*/ enum {
RPCRDMA_MIN_SEND_SGES = 3,
RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT,
RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,
};
/* * struct rpcrdma_buffer -- holds list/queue of pre-registered memory for * inline requests/replies, and client/server credits. * * One of these is associated with a transport instance
*/ struct rpcrdma_buffer {
spinlock_t rb_lock; struct list_head rb_send_bufs; struct list_head rb_mrs;
/* accessed when receiving a reply */ unsignedlonglong total_rdma_reply; unsignedlonglong fixup_copy_count; unsignedlong reply_waits_for_send; unsignedlong local_inv_needed; unsignedlong nomsg_call_count; unsignedlong bcall_count;
};
/* * RPCRDMA transport -- encapsulates the structures above for * integration with RPC. * * The contained structures are embedded, not pointers, * for convenience. This structure need not be visible externally. * * It is allocated and initialized during mount, and released * during unmount.
*/ struct rpcrdma_xprt { struct rpc_xprt rx_xprt; struct rpcrdma_ep *rx_ep; struct rpcrdma_buffer rx_buf; struct delayed_work rx_connect_worker; struct rpc_timeout rx_timeout; struct rpcrdma_stats rx_stats;
};
/* Setting this to 0 ensures interoperability with early servers. * Setting this to 1 enhances certain unaligned read/write performance.
* Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ externint xprt_rdma_pad_optimize;
/* This setting controls the hunt for a supported memory * registration strategy.
*/ externunsignedint xprt_rdma_memreg_strategy;
/** * rpcrdma_regbuf_is_mapped - check if buffer is DMA mapped * * Returns true if the buffer is now mapped to rb->rg_device.
*/ staticinlinebool rpcrdma_regbuf_is_mapped(struct rpcrdma_regbuf *rb)
{ return rb->rg_device != NULL;
}
/** * rpcrdma_regbuf_dma_map - DMA-map a regbuf * @r_xprt: controlling transport instance * @rb: regbuf to be mapped * * Returns true if the buffer is currently DMA mapped.
*/ staticinlinebool rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_regbuf *rb)
{ if (likely(rpcrdma_regbuf_is_mapped(rb))) returntrue; return __rpcrdma_regbuf_dma_map(r_xprt, rb);
}
/* * Wrappers for chunk registration, shared by read/write chunk code.
*/
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.