// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies Ltd. */
#include <linux/module.h>
#include <linux/vdpa.h>
#include <linux/vringh.h>
#include <uapi/linux/virtio_net.h>
#include <uapi/linux/virtio_ids.h>
#include <uapi/linux/vdpa.h>
#include <uapi/linux/vhost_types.h>
#include <linux/virtio_config.h>
#include <linux/auxiliary_bus.h>
#include <linux/mlx5/cq.h>
#include <linux/mlx5/qp.h>
#include <linux/mlx5/device.h>
#include <linux/mlx5/driver.h>
#include <linux/mlx5/vport.h>
#include <linux/mlx5/fs.h>
#include <linux/mlx5/mlx5_ifc_vdpa.h>
#include <linux/mlx5/mpfs.h>
#include "mlx5_vdpa.h"
#include "mlx5_vnet.h"
MODULE_AUTHOR("Eli Cohen " );
MODULE_DESCRIPTION("Mellanox VDPA driver" );
MODULE_LICENSE("Dual BSD/GPL" );
#define VALID_FEATURES_MASK \
(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | \
BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) | \
BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | \
BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) | \
BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | \
BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) | \
BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) | \
BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) | \
BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) | \
BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) | \
BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) | \
BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
#define VALID_STATUS_MASK \
(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
#define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
#define MLX5V_UNTAGGED 0x1000
/* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
* 5.1.6.5.5 "Device operation in multiqueue mode":
*
* Multiqueue is disabled by default.
* The driver enables multiqueue by sending a command using class
* VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
* operation, as follows: ...
*/
#define MLX5V_DEFAULT_VQ_COUNT 2
#define MLX5V_DEFAULT_VQ_SIZE 256
struct mlx5_vdpa_cq_buf {
struct mlx5_frag_buf_ctrl fbc;
struct mlx5_frag_buf frag_buf;
int cqe_size;
int nent;
};
struct mlx5_vdpa_cq {
struct mlx5_core_cq mcq;
struct mlx5_vdpa_cq_buf buf;
struct mlx5_db db;
int cqe;
};
struct mlx5_vdpa_umem {
struct mlx5_frag_buf_ctrl fbc;
struct mlx5_frag_buf frag_buf;
int size;
u32 id;
};
struct mlx5_vdpa_qp {
struct mlx5_core_qp mqp;
struct mlx5_frag_buf frag_buf;
struct mlx5_db db;
u16 head;
bool fw;
};
struct mlx5_vq_restore_info {
u32 num_ent;
u64 desc_addr;
u64 device_addr;
u64 driver_addr;
u16 avail_index;
u16 used_index;
struct msi_map map;
bool ready;
bool restore;
};
struct mlx5_vdpa_virtqueue {
bool ready;
u64 desc_addr;
u64 device_addr;
u64 driver_addr;
u32 num_ent;
/* Resources for implementing the notification channel from the device
* to the driver. fwqp is the firmware end of an RC connection; the
* other end is vqqp used by the driver. cq is where completions are
* reported.
*/
struct mlx5_vdpa_cq cq;
struct mlx5_vdpa_qp fwqp;
struct mlx5_vdpa_qp vqqp;
/* umem resources are required for the virtqueue operation. They're use
* is internal and they must be provided by the driver.
*/
struct mlx5_vdpa_umem umem1;
struct mlx5_vdpa_umem umem2;
struct mlx5_vdpa_umem umem3;
u32 counter_set_id;
bool initialized;
int index;
u32 virtq_id;
struct mlx5_vdpa_net *ndev;
u16 avail_idx;
u16 used_idx;
int fw_state;
u64 modified_fields;
struct mlx5_vdpa_mr *vq_mr;
struct mlx5_vdpa_mr *desc_mr;
struct msi_map map;
/* keep last in the struct */
struct mlx5_vq_restore_info ri;
};
static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
{
if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
return idx < 2;
else
return idx < 3;
}
return idx <= mvdev->max_idx;
}
static void free_fixed_resources(struct mlx5_vdpa_net *ndev);
static void mvqs_set_defaults(struct mlx5_vdpa_net *ndev);
static int setup_vq_resources(struct mlx5_vdpa_net *ndev, bool filled);
static void teardown_vq_resources(struct mlx5_vdpa_net *ndev);
static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq);
static bool mlx5_vdpa_debug;
#define MLX5_LOG_VIO_FLAG(_feature) \
do { \
if (features & BIT_ULL(_feature)) \
mlx5_vdpa_info(mvdev, "%s\n" , #_ feature); \
} while (0)
#define MLX5_LOG_VIO_STAT(_status) \
do { \
if (status & (_status)) \
mlx5_vdpa_info(mvdev, "%s\n" , #_ status); \
} while (0)
/* TODO: cross-endian support */
static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
{
return virtio_legacy_is_little_endian() ||
(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
}
static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
{
return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
}
static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
{
return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
}
static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
{
if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
return 2;
return mvdev->max_vqs;
}
static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
{
return idx == ctrl_vq_idx(mvdev);
}
static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
{
if (status & ~VALID_STATUS_MASK)
mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n" ,
status & ~VALID_STATUS_MASK);
if (!mlx5_vdpa_debug)
return ;
mlx5_vdpa_info(mvdev, "driver status %s" , set ? "set" : "get" );
if (set && !status) {
mlx5_vdpa_info(mvdev, "driver resets the device\n" );
return ;
}
MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
}
static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
{
if (features & ~VALID_FEATURES_MASK)
mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n" ,
features & ~VALID_FEATURES_MASK);
if (!mlx5_vdpa_debug)
return ;
mlx5_vdpa_info(mvdev, "driver %s feature bits:\n" , set ? "sets" : "reads" );
if (!features)
mlx5_vdpa_info(mvdev, "all feature bits are cleared\n" );
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
}
static int create_tis(struct mlx5_vdpa_net *ndev)
{
struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
void *tisc;
int err;
tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
if (err)
mlx5_vdpa_warn(mvdev, "create TIS (%d)\n" , err);
return err;
}
static void destroy_tis(struct mlx5_vdpa_net *ndev)
{
mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
}
#define MLX5_VDPA_CQE_SIZE 64
#define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
{
struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
int err;
err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
ndev->mvdev.mdev->priv.numa_node);
if (err)
return err;
mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
buf->cqe_size = MLX5_VDPA_CQE_SIZE;
buf->nent = nent;
return 0;
}
static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
{
struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
ndev->mvdev.mdev->priv.numa_node);
}
static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
{
mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
}
static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
{
return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
}
static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
{
struct mlx5_cqe64 *cqe64;
void *cqe;
int i;
for (i = 0; i < buf->nent; i++) {
cqe = get_cqe(vcq, i);
cqe64 = cqe;
cqe64->op_own = MLX5_CQE_INVALID << 4;
}
}
static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
{
struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
!((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
return cqe64;
return NULL;
}
static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
{
vqp->head += n;
vqp->db.db[0] = cpu_to_be32(vqp->head);
}
static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
{
struct mlx5_vdpa_qp *vqp;
__be64 *pas;
void *qpc;
vqp = fw ? &mvq->fwqp : &mvq->vqqp;
MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
if (vqp->fw) {
/* Firmware QP is allocated by the driver for the firmware's
* use so we can skip part of the params as they will be chosen by firmware
*/
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
MLX5_SET(qpc, qpc, no_sq, 1);
return ;
}
MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET(qpc, qpc, no_sq, 1);
MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
}
static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
{
return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
num_ent * sizeof (struct mlx5_wqe_data_seg), &vqp->frag_buf,
ndev->mvdev.mdev->priv.numa_node);
}
static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
{
mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
}
static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
struct mlx5_vdpa_qp *vqp)
{
struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
void *qpc;
void *in;
int err;
if (!vqp->fw) {
vqp = &mvq->vqqp;
err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
if (err)
return err;
err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
if (err)
goto err_db;
inlen += vqp->frag_buf.npages * sizeof (__be64);
}
in = kzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_kzalloc;
}
qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
if (!vqp->fw)
MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof (out));
kfree(in);
if (err)
goto err_kzalloc;
vqp->mqp.uid = ndev->mvdev.res.uid;
vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
if (!vqp->fw)
rx_post(vqp, mvq->num_ent);
return 0;
err_kzalloc:
if (!vqp->fw)
mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
err_db:
if (!vqp->fw)
rq_buf_free(ndev, vqp);
return err;
}
static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
{
u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n" , vqp->mqp.qpn);
if (!vqp->fw) {
mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
rq_buf_free(ndev, vqp);
}
}
static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
{
return get_sw_cqe(cq, cq->mcq.cons_index);
}
static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
{
struct mlx5_cqe64 *cqe64;
cqe64 = next_cqe_sw(vcq);
if (!cqe64)
return -EAGAIN;
vcq->mcq.cons_index++;
return 0;
}
static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
{
struct mlx5_vdpa_net *ndev = mvq->ndev;
struct vdpa_callback *event_cb;
event_cb = &ndev->event_cbs[mvq->index];
mlx5_cq_set_ci(&mvq->cq.mcq);
/* make sure CQ cosumer update is visible to the hardware before updating
* RX doorbell record.
*/
dma_wmb();
rx_post(&mvq->vqqp, num);
if (event_cb->callback)
event_cb->callback(event_cb->private );
}
static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
{
struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
struct mlx5_vdpa_net *ndev = mvq->ndev;
void __iomem *uar_page = ndev->mvdev.res.uar->map;
int num = 0;
while (!mlx5_vdpa_poll_one(&mvq->cq)) {
num++;
if (num > mvq->num_ent / 2) {
/* If completions keep coming while we poll, we want to
* let the hardware know that we consumed them by
* updating the doorbell record. We also let vdpa core
* know about this so it passes it on the virtio driver
* on the guest.
*/
mlx5_vdpa_handle_completions(mvq, num);
num = 0;
}
}
if (num)
mlx5_vdpa_handle_completions(mvq, num);
mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
}
static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
{
struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
void __iomem *uar_page = ndev->mvdev.res.uar->map;
u32 out[MLX5_ST_SZ_DW(create_cq_out)];
struct mlx5_vdpa_cq *vcq = &mvq->cq;
__be64 *pas;
int inlen;
void *cqc;
void *in;
int err;
int eqn;
err = mlx5_db_alloc(mdev, &vcq->db);
if (err)
return err;
vcq->mcq.set_ci_db = vcq->db.db;
vcq->mcq.arm_db = vcq->db.db + 1;
vcq->mcq.cqe_sz = 64;
vcq->mcq.comp = mlx5_vdpa_cq_comp;
vcq->cqe = num_ent;
err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
if (err)
goto err_db;
cq_frag_buf_init(vcq, &vcq->buf);
inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
in = kzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_vzalloc;
}
MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
/* Use vector 0 by default. Consider adding code to choose least used
* vector.
*/
err = mlx5_comp_eqn_get(mdev, 0, &eqn);
if (err)
goto err_vec;
cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof (out));
if (err)
goto err_vec;
mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
kfree(in);
return 0;
err_vec:
kfree(in);
err_vzalloc:
cq_frag_buf_free(ndev, &vcq->buf);
err_db:
mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
return err;
}
static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
{
struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
struct mlx5_vdpa_cq *vcq = &mvq->cq;
if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n" , vcq->mcq.cqn);
return ;
}
cq_frag_buf_free(ndev, &vcq->buf);
mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
}
static int read_umem_params(struct mlx5_vdpa_net *ndev)
{
u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
int out_size;
void *caps;
void *out;
int err;
out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
out = kzalloc(out_size, GFP_KERNEL);
if (!out)
return -ENOMEM;
MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
if (err) {
mlx5_vdpa_warn(&ndev->mvdev,
"Failed reading vdpa umem capabilities with err %d\n" , err);
goto out;
}
caps = MLX5_ADDR_OF(query_hca_cap_out, out, capability);
ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
out:
kfree(out);
return 0;
}
static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
struct mlx5_vdpa_umem **umemp)
{
u32 p_a;
u32 p_b;
switch (num) {
case 1:
p_a = ndev->umem_1_buffer_param_a;
p_b = ndev->umem_1_buffer_param_b;
*umemp = &mvq->umem1;
break ;
case 2:
p_a = ndev->umem_2_buffer_param_a;
p_b = ndev->umem_2_buffer_param_b;
*umemp = &mvq->umem2;
break ;
case 3:
p_a = ndev->umem_3_buffer_param_a;
p_b = ndev->umem_3_buffer_param_b;
*umemp = &mvq->umem3;
break ;
}
(*umemp)->size = p_a * mvq->num_ent + p_b;
}
static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
{
mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
}
static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
{
int inlen;
u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
void *um;
void *in;
int err;
__be64 *pas;
struct mlx5_vdpa_umem *umem;
set_umem_size(ndev, mvq, num, &umem);
err = umem_frag_buf_alloc(ndev, umem, umem->size);
if (err)
return err;
inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
in = kzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_in;
}
MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
um = MLX5_ADDR_OF(create_umem_in, in, umem);
MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof (out));
if (err) {
mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n" , err);
goto err_cmd;
}
kfree(in);
umem->id = MLX5_GET(create_umem_out, out, umem_id);
return 0;
err_cmd:
kfree(in);
err_in:
umem_frag_buf_free(ndev, umem);
return err;
}
static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
{
u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
struct mlx5_vdpa_umem *umem;
switch (num) {
case 1:
umem = &mvq->umem1;
break ;
case 2:
umem = &mvq->umem2;
break ;
case 3:
umem = &mvq->umem3;
break ;
}
MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof (in), out, sizeof (out)))
return ;
umem_frag_buf_free(ndev, umem);
}
static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
int num;
int err;
for (num = 1; num <= 3; num++) {
err = create_umem(ndev, mvq, num);
if (err)
goto err_umem;
}
return 0;
err_umem:
for (num--; num > 0; num--)
umem_destroy(ndev, mvq, num);
return err;
}
static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
int num;
for (num = 3; num > 0; num--)
umem_destroy(ndev, mvq, num);
}
static int get_queue_type(struct mlx5_vdpa_net *ndev)
{
u32 type_mask;
type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
/* prefer split queue */
if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
}
static bool vq_is_tx(u16 idx)
{
return idx % 2;
}
enum {
MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
MLX5_VIRTIO_NET_F_HOST_ECN = 4,
MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
MLX5_VIRTIO_NET_F_CSUM = 10,
MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
};
static u16 get_features(u64 features)
{
return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
(!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
(!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
(!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
(!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
(!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
(!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
(!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
}
static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
{
return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
}
static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
{
return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
}
static int create_virtqueue(struct mlx5_vdpa_net *ndev,
struct mlx5_vdpa_virtqueue *mvq,
bool filled)
{
int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
struct mlx5_vdpa_mr *vq_mr;
struct mlx5_vdpa_mr *vq_desc_mr;
u64 features = filled ? mvdev->actual_features : mvdev->mlx_features;
void *obj_context;
u16 mlx_features;
void *cmd_hdr;
void *vq_ctx;
void *in;
int err;
err = umems_create(ndev, mvq);
if (err)
return err;
in = kzalloc(inlen, GFP_KERNEL);
if (!in) {
err = -ENOMEM;
goto err_alloc;
}
mlx_features = get_features(features);
cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
mlx_features >> 3);
MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
mlx_features & 7);
vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
if (vq_is_tx(mvq->index))
MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
if (mvq->map.virq) {
MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
} else {
MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
}
MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
!!(features & BIT_ULL(VIRTIO_F_VERSION_1)));
if (filled) {
MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
if (vq_mr)
MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
if (vq_desc_mr &&
MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, vq_desc_mr->mkey);
} else {
/* If there is no mr update, make sure that the existing ones are set
* modify to ready.
*/
vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
if (vq_mr)
mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
vq_desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
if (vq_desc_mr)
mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
}
MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
if (counters_supported(&ndev->mvdev))
MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof (out));
if (err)
goto err_cmd;
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
kfree(in);
mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
if (filled) {
mlx5_vdpa_get_mr(mvdev, vq_mr);
mvq->vq_mr = vq_mr;
if (vq_desc_mr &&
MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported)) {
mlx5_vdpa_get_mr(mvdev, vq_desc_mr);
mvq->desc_mr = vq_desc_mr;
}
}
return 0;
err_cmd:
kfree(in);
err_alloc:
umems_destroy(ndev, mvq);
return err;
}
static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
MLX5_OBJ_TYPE_VIRTIO_NET_Q);
if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof (in), out, sizeof (out))) {
mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n" , mvq->virtq_id);
return ;
}
mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
umems_destroy(ndev, mvq);
mlx5_vdpa_put_mr(&ndev->mvdev, mvq->vq_mr);
mvq->vq_mr = NULL;
mlx5_vdpa_put_mr(&ndev->mvdev, mvq->desc_mr);
mvq->desc_mr = NULL;
}
static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
{
return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
}
static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
{
return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
}
static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
int *outlen, u32 qpn, u32 rqpn)
{
void *qpc;
void *pp;
switch (cmd) {
case MLX5_CMD_OP_2RST_QP:
*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
*in = kzalloc(*inlen, GFP_KERNEL);
*out = kzalloc(*outlen, GFP_KERNEL);
if (!*in || !*out)
goto outerr;
MLX5_SET(qp_2rst_in, *in, opcode, cmd);
MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
MLX5_SET(qp_2rst_in, *in, qpn, qpn);
break ;
case MLX5_CMD_OP_RST2INIT_QP:
*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
*in = kzalloc(*inlen, GFP_KERNEL);
*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
if (!*in || !*out)
goto outerr;
MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
MLX5_SET(qpc, qpc, remote_qpn, rqpn);
MLX5_SET(qpc, qpc, rwe, 1);
pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
MLX5_SET(ads, pp, vhca_port_num, 1);
break ;
case MLX5_CMD_OP_INIT2RTR_QP:
*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
*in = kzalloc(*inlen, GFP_KERNEL);
*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
if (!*in || !*out)
goto outerr;
MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
MLX5_SET(qpc, qpc, log_msg_max, 30);
MLX5_SET(qpc, qpc, remote_qpn, rqpn);
pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
MLX5_SET(ads, pp, fl, 1);
break ;
case MLX5_CMD_OP_RTR2RTS_QP:
*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
*in = kzalloc(*inlen, GFP_KERNEL);
*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
if (!*in || !*out)
goto outerr;
MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
MLX5_SET(ads, pp, ack_timeout, 14);
MLX5_SET(qpc, qpc, retry_count, 7);
MLX5_SET(qpc, qpc, rnr_retry, 7);
break ;
default :
goto outerr_nullify;
}
return ;
outerr:
kfree(*in);
kfree(*out);
outerr_nullify:
*in = NULL;
*out = NULL;
}
static void free_inout(void *in, void *out)
{
kfree(in);
kfree(out);
}
/* Two QPs are used by each virtqueue. One is used by the driver and one by
* firmware. The fw argument indicates whether the subjected QP is the one used
* by firmware.
*/
static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
{
int outlen;
int inlen;
void *out;
void *in;
int err;
alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
if (!in || !out)
return -ENOMEM;
err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
free_inout(in, out);
return err;
}
static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
int err;
err = modify_qp(ndev, mvq, true , MLX5_CMD_OP_2RST_QP);
if (err)
return err;
err = modify_qp(ndev, mvq, false , MLX5_CMD_OP_2RST_QP);
if (err)
return err;
err = modify_qp(ndev, mvq, true , MLX5_CMD_OP_RST2INIT_QP);
if (err)
return err;
err = modify_qp(ndev, mvq, false , MLX5_CMD_OP_RST2INIT_QP);
if (err)
return err;
err = modify_qp(ndev, mvq, true , MLX5_CMD_OP_INIT2RTR_QP);
if (err)
return err;
err = modify_qp(ndev, mvq, false , MLX5_CMD_OP_INIT2RTR_QP);
if (err)
return err;
return modify_qp(ndev, mvq, true , MLX5_CMD_OP_RTR2RTS_QP);
}
struct mlx5_virtq_attr {
u8 state;
u16 available_index;
u16 used_index;
};
struct mlx5_virtqueue_query_mem {
u8 in[MLX5_ST_SZ_BYTES(query_virtio_net_q_in)];
u8 out[MLX5_ST_SZ_BYTES(query_virtio_net_q_out)];
};
struct mlx5_virtqueue_modify_mem {
u8 in[MLX5_ST_SZ_BYTES(modify_virtio_net_q_in)];
u8 out[MLX5_ST_SZ_BYTES(modify_virtio_net_q_out)];
};
static void fill_query_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
struct mlx5_vdpa_virtqueue *mvq,
struct mlx5_virtqueue_query_mem *cmd)
{
void *cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
}
static void query_virtqueue_end(struct mlx5_vdpa_net *ndev,
struct mlx5_virtqueue_query_mem *cmd,
struct mlx5_virtq_attr *attr)
{
void *obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, cmd->out, obj_context);
memset(attr, 0, sizeof (*attr));
attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
}
static int query_virtqueues(struct mlx5_vdpa_net *ndev,
int start_vq,
int num_vqs,
struct mlx5_virtq_attr *attrs)
{
struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
struct mlx5_virtqueue_query_mem *cmd_mem;
struct mlx5_vdpa_async_cmd *cmds;
int err = 0;
WARN(start_vq + num_vqs > mvdev->max_vqs, "query vq range invalid [%d, %d), max_vqs: %u\n" ,
start_vq, start_vq + num_vqs, mvdev->max_vqs);
cmds = kvcalloc(num_vqs, sizeof (*cmds), GFP_KERNEL);
cmd_mem = kvcalloc(num_vqs, sizeof (*cmd_mem), GFP_KERNEL);
if (!cmds || !cmd_mem) {
err = -ENOMEM;
goto done;
}
for (int i = 0; i < num_vqs; i++) {
cmds[i].in = &cmd_mem[i].in;
cmds[i].inlen = sizeof (cmd_mem[i].in);
cmds[i].out = &cmd_mem[i].out;
cmds[i].outlen = sizeof (cmd_mem[i].out);
fill_query_virtqueue_cmd(ndev, &ndev->vqs[start_vq + i], &cmd_mem[i]);
}
err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
if (err) {
mlx5_vdpa_err(mvdev, "error issuing query cmd for vq range [%d, %d): %d\n" ,
start_vq, start_vq + num_vqs, err);
goto done;
}
for (int i = 0; i < num_vqs; i++) {
struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
int vq_idx = start_vq + i;
if (cmd->err) {
mlx5_vdpa_err(mvdev, "query vq %d failed, err: %d\n" , vq_idx, err);
if (!err)
err = cmd->err;
continue ;
}
query_virtqueue_end(ndev, &cmd_mem[i], &attrs[i]);
}
done:
kvfree(cmd_mem);
kvfree(cmds);
return err;
}
static bool is_resumable(struct mlx5_vdpa_net *ndev)
{
return ndev->mvdev.vdev.config->resume;
}
static bool is_valid_state_change(int oldstate, int newstate, bool resumable)
{
switch (oldstate) {
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
return resumable ? newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY : false ;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
default :
return false ;
}
}
static bool modifiable_virtqueue_fields(struct mlx5_vdpa_virtqueue *mvq)
{
/* Only state is always modifiable */
if (mvq->modified_fields & ~MLX5_VIRTQ_MODIFY_MASK_STATE)
return mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT ||
mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
return true ;
}
static void fill_modify_virtqueue_cmd(struct mlx5_vdpa_net *ndev,
struct mlx5_vdpa_virtqueue *mvq,
int state,
struct mlx5_virtqueue_modify_mem *cmd)
{
struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
struct mlx5_vdpa_mr *desc_mr = NULL;
struct mlx5_vdpa_mr *vq_mr = NULL;
void *obj_context;
void *cmd_hdr;
void *vq_ctx;
cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, general_obj_in_cmd_hdr);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, cmd->in, obj_context);
vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE)
MLX5_SET(virtio_net_q_object, obj_context, state, state);
if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_ADDRS) {
MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
}
if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_AVAIL_IDX)
MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_USED_IDX)
MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_VIRTIO_VERSION)
MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
!!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_QUEUE_FEATURES) {
u16 mlx_features = get_features(ndev->mvdev.actual_features);
MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
mlx_features >> 3);
MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
mlx_features & 7);
}
if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
vq_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP]];
if (vq_mr)
MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, vq_mr->mkey);
else
mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY;
}
if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
desc_mr = mvdev->mres.mr[mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP]];
if (desc_mr && MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, desc_group_mkey_supported))
MLX5_SET(virtio_q, vq_ctx, desc_group_mkey, desc_mr->mkey);
else
mvq->modified_fields &= ~MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY;
}
MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select, mvq->modified_fields);
}
static void modify_virtqueue_end(struct mlx5_vdpa_net *ndev,
struct mlx5_vdpa_virtqueue *mvq,
int state)
{
struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_VIRTIO_Q_MKEY) {
unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_GROUP];
struct mlx5_vdpa_mr *vq_mr = mvdev->mres.mr[asid];
mlx5_vdpa_put_mr(mvdev, mvq->vq_mr);
mlx5_vdpa_get_mr(mvdev, vq_mr);
mvq->vq_mr = vq_mr;
}
if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_DESC_GROUP_MKEY) {
unsigned int asid = mvdev->mres.group2asid[MLX5_VDPA_DATAVQ_DESC_GROUP];
struct mlx5_vdpa_mr *desc_mr = mvdev->mres.mr[asid];
mlx5_vdpa_put_mr(mvdev, mvq->desc_mr);
mlx5_vdpa_get_mr(mvdev, desc_mr);
mvq->desc_mr = desc_mr;
}
if (mvq->modified_fields & MLX5_VIRTQ_MODIFY_MASK_STATE)
mvq->fw_state = state;
mvq->modified_fields = 0;
}
static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
void *cmd_hdr;
int err;
if (!counters_supported(&ndev->mvdev))
return 0;
cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof (in), out, sizeof (out));
if (err)
return err;
mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
return 0;
}
static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
if (!counters_supported(&ndev->mvdev))
return ;
MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof (in), out, sizeof (out)))
mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n" , mvq->counter_set_id);
}
static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
{
struct vdpa_callback *cb = priv;
if (cb->callback)
return cb->callback(cb->private );
return IRQ_HANDLED;
}
static void alloc_vector(struct mlx5_vdpa_net *ndev,
struct mlx5_vdpa_virtqueue *mvq)
{
struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
struct mlx5_vdpa_irq_pool_entry *ent;
int err;
int i;
for (i = 0; i < irqp->num_ent; i++) {
ent = &irqp->entries[i];
if (!ent->used) {
snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d" ,
dev_name(&ndev->mvdev.vdev.dev), mvq->index);
ent->dev_id = &ndev->event_cbs[mvq->index];
err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
ent->name, ent->dev_id);
if (err)
return ;
ent->used = true ;
mvq->map = ent->map;
return ;
}
}
}
static void dealloc_vector(struct mlx5_vdpa_net *ndev,
struct mlx5_vdpa_virtqueue *mvq)
{
struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
int i;
for (i = 0; i < irqp->num_ent; i++)
if (mvq->map.virq == irqp->entries[i].map.virq) {
free_irq(mvq->map.virq, irqp->entries[i].dev_id);
irqp->entries[i].used = false ;
return ;
}
}
static int setup_vq(struct mlx5_vdpa_net *ndev,
struct mlx5_vdpa_virtqueue *mvq,
bool filled)
{
u16 idx = mvq->index;
int err;
if (mvq->initialized)
return 0;
err = cq_create(ndev, idx, mvq->num_ent);
if (err)
return err;
err = qp_create(ndev, mvq, &mvq->fwqp);
if (err)
goto err_fwqp;
err = qp_create(ndev, mvq, &mvq->vqqp);
if (err)
goto err_vqqp;
err = connect_qps(ndev, mvq);
if (err)
goto err_connect;
err = counter_set_alloc(ndev, mvq);
if (err)
goto err_connect;
alloc_vector(ndev, mvq);
err = create_virtqueue(ndev, mvq, filled);
if (err)
goto err_vq;
mvq->initialized = true ;
if (mvq->ready) {
err = resume_vq(ndev, mvq);
if (err)
goto err_modify;
}
return 0;
err_modify:
destroy_virtqueue(ndev, mvq);
err_vq:
dealloc_vector(ndev, mvq);
counter_set_dealloc(ndev, mvq);
err_connect:
qp_destroy(ndev, &mvq->vqqp);
err_vqqp:
qp_destroy(ndev, &mvq->fwqp);
err_fwqp:
cq_destroy(ndev, idx);
return err;
}
static int modify_virtqueues(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs, int state)
{
struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
struct mlx5_virtqueue_modify_mem *cmd_mem;
struct mlx5_vdpa_async_cmd *cmds;
int err = 0;
WARN(start_vq + num_vqs > mvdev->max_vqs, "modify vq range invalid [%d, %d), max_vqs: %u\n" ,
start_vq, start_vq + num_vqs, mvdev->max_vqs);
cmds = kvcalloc(num_vqs, sizeof (*cmds), GFP_KERNEL);
cmd_mem = kvcalloc(num_vqs, sizeof (*cmd_mem), GFP_KERNEL);
if (!cmds || !cmd_mem) {
err = -ENOMEM;
goto done;
}
for (int i = 0; i < num_vqs; i++) {
struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
struct mlx5_vdpa_virtqueue *mvq;
int vq_idx = start_vq + i;
mvq = &ndev->vqs[vq_idx];
if (!modifiable_virtqueue_fields(mvq)) {
err = -EINVAL;
goto done;
}
if (mvq->fw_state != state) {
if (!is_valid_state_change(mvq->fw_state, state, is_resumable(ndev))) {
err = -EINVAL;
goto done;
}
mvq->modified_fields |= MLX5_VIRTQ_MODIFY_MASK_STATE;
}
cmd->in = &cmd_mem[i].in;
cmd->inlen = sizeof (cmd_mem[i].in);
cmd->out = &cmd_mem[i].out;
cmd->outlen = sizeof (cmd_mem[i].out);
fill_modify_virtqueue_cmd(ndev, mvq, state, &cmd_mem[i]);
}
err = mlx5_vdpa_exec_async_cmds(&ndev->mvdev, cmds, num_vqs);
if (err) {
mlx5_vdpa_err(mvdev, "error issuing modify cmd for vq range [%d, %d)\n" ,
start_vq, start_vq + num_vqs);
goto done;
}
for (int i = 0; i < num_vqs; i++) {
struct mlx5_vdpa_async_cmd *cmd = &cmds[i];
struct mlx5_vdpa_virtqueue *mvq;
int vq_idx = start_vq + i;
mvq = &ndev->vqs[vq_idx];
if (cmd->err) {
mlx5_vdpa_err(mvdev, "modify vq %d failed, state: %d -> %d, err: %d\n" ,
vq_idx, mvq->fw_state, state, err);
if (!err)
err = cmd->err;
continue ;
}
modify_virtqueue_end(ndev, mvq, state);
}
done:
kvfree(cmd_mem);
kvfree(cmds);
return err;
}
static int suspend_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs)
{
struct mlx5_vdpa_virtqueue *mvq;
struct mlx5_virtq_attr *attrs;
int vq_idx, i;
int err;
if (start_vq >= ndev->cur_num_vqs)
return -EINVAL;
mvq = &ndev->vqs[start_vq];
if (!mvq->initialized)
return 0;
if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
return 0;
err = modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND);
if (err)
return err;
attrs = kcalloc(num_vqs, sizeof (struct mlx5_virtq_attr), GFP_KERNEL);
if (!attrs)
return -ENOMEM;
err = query_virtqueues(ndev, start_vq, num_vqs, attrs);
if (err)
goto done;
for (i = 0, vq_idx = start_vq; i < num_vqs; i++, vq_idx++) {
mvq = &ndev->vqs[vq_idx];
mvq->avail_idx = attrs[i].available_index;
mvq->used_idx = attrs[i].used_index;
}
done:
kfree(attrs);
return err;
}
static int suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
return suspend_vqs(ndev, mvq->index, 1);
}
static int resume_vqs(struct mlx5_vdpa_net *ndev, int start_vq, int num_vqs)
{
struct mlx5_vdpa_virtqueue *mvq;
int err;
if (start_vq >= ndev->mvdev.max_vqs)
return -EINVAL;
mvq = &ndev->vqs[start_vq];
if (!mvq->initialized)
return 0;
if (mvq->index >= ndev->cur_num_vqs)
return 0;
switch (mvq->fw_state) {
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
/* Due to a FW quirk we need to modify the VQ fields first then change state.
* This should be fixed soon. After that, a single command can be used.
*/
err = modify_virtqueues(ndev, start_vq, num_vqs, mvq->fw_state);
if (err)
return err;
break ;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
if (!is_resumable(ndev)) {
mlx5_vdpa_warn(&ndev->mvdev, "vq %d is not resumable\n" , mvq->index);
return -EINVAL;
}
break ;
case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
return 0;
default :
mlx5_vdpa_err(&ndev->mvdev, "resume vq %u called from bad state %d\n" ,
mvq->index, mvq->fw_state);
return -EINVAL;
}
return modify_virtqueues(ndev, start_vq, num_vqs, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
}
static int resume_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
return resume_vqs(ndev, mvq->index, 1);
}
static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
if (!mvq->initialized)
return ;
suspend_vq(ndev, mvq);
mvq->modified_fields = 0;
destroy_virtqueue(ndev, mvq);
dealloc_vector(ndev, mvq);
counter_set_dealloc(ndev, mvq);
qp_destroy(ndev, &mvq->vqqp);
qp_destroy(ndev, &mvq->fwqp);
cq_destroy(ndev, mvq->index);
mvq->initialized = false ;
}
static int create_rqt(struct mlx5_vdpa_net *ndev)
{
int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
__be32 *list;
void *rqtc;
int inlen;
void *in;
int i, j;
int err;
inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
for (i = 0, j = 0; i < act_sz; i++, j += 2)
list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
kfree(in);
if (err)
return err;
return 0;
}
#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
{
int act_sz = roundup_pow_of_two(num / 2);
__be32 *list;
void *rqtc;
int inlen;
void *in;
int i, j;
int err;
inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
kfree(in);
if (err)
return err;
return 0;
}
static void destroy_rqt(struct mlx5_vdpa_net *ndev)
{
mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
}
static int create_tir(struct mlx5_vdpa_net *ndev)
{
#define HASH_IP_L4PORTS \
(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT | \
MLX5_HASH_FIELD_SEL_L4_DPORT)
static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
void *rss_key;
void *outer;
void *tirc;
void *in;
int err;
in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
if (!in)
return -ENOMEM;
MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
memcpy(rss_key, rx_hash_toeplitz_key, sizeof (rx_hash_toeplitz_key));
outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
kfree(in);
if (err)
return err;
mlx5_vdpa_add_tirn(ndev);
return err;
}
static void destroy_tir(struct mlx5_vdpa_net *ndev)
{
mlx5_vdpa_remove_tirn(ndev);
mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
}
#define MAX_STEERING_ENT 0x8000
#define MAX_STEERING_GROUPS 2
#if defined (CONFIG_MLX5_VDPA_STEERING_DEBUG)
#define NUM_DESTS 2
#else
#define NUM_DESTS 1
#endif
static int add_steering_counters(struct mlx5_vdpa_net *ndev,
struct macvlan_node *node,
struct mlx5_flow_act *flow_act,
struct mlx5_flow_destination *dests)
{
#if defined (CONFIG_MLX5_VDPA_STEERING_DEBUG)
int err;
node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false );
if (IS_ERR(node->ucast_counter.counter))
return PTR_ERR(node->ucast_counter.counter);
node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false );
if (IS_ERR(node->mcast_counter.counter)) {
err = PTR_ERR(node->mcast_counter.counter);
goto err_mcast_counter;
}
dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
return 0;
err_mcast_counter:
mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
return err;
#else
return 0;
#endif
}
static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
struct macvlan_node *node)
{
#if defined (CONFIG_MLX5_VDPA_STEERING_DEBUG)
mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
#endif
}
static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
struct macvlan_node *node)
{
struct mlx5_flow_destination dests[NUM_DESTS] = {};
struct mlx5_flow_act flow_act = {};
struct mlx5_flow_spec *spec;
void *headers_c;
void *headers_v;
u8 *dmac_c;
u8 *dmac_v;
int err;
u16 vid;
spec = kvzalloc(sizeof (*spec), GFP_KERNEL);
if (!spec)
return -ENOMEM;
vid = key2vid(node->macvlan);
spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
eth_broadcast_addr(dmac_c);
ether_addr_copy(dmac_v, mac);
if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
}
if (node->tagged) {
MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
}
flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
dests[0].tir_num = ndev->res.tirn;
err = add_steering_counters(ndev, node, &flow_act, dests);
if (err)
goto out_free;
#if defined (CONFIG_MLX5_VDPA_STEERING_DEBUG)
dests[1].counter = node->ucast_counter.counter;
#endif
node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
if (IS_ERR(node->ucast_rule)) {
err = PTR_ERR(node->ucast_rule);
goto err_ucast;
}
#if defined (CONFIG_MLX5_VDPA_STEERING_DEBUG)
dests[1].counter = node->mcast_counter.counter;
#endif
memset(dmac_c, 0, ETH_ALEN);
memset(dmac_v, 0, ETH_ALEN);
dmac_c[0] = 1;
dmac_v[0] = 1;
node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
if (IS_ERR(node->mcast_rule)) {
err = PTR_ERR(node->mcast_rule);
goto err_mcast;
}
kvfree(spec);
mlx5_vdpa_add_rx_counters(ndev, node);
return 0;
err_mcast:
mlx5_del_flow_rules(node->ucast_rule);
err_ucast:
remove_steering_counters(ndev, node);
out_free:
kvfree(spec);
return err;
}
static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
struct macvlan_node *node)
{
mlx5_vdpa_remove_rx_counters(ndev, node);
mlx5_del_flow_rules(node->ucast_rule);
mlx5_del_flow_rules(node->mcast_rule);
}
static u64 search_val(u8 *mac, u16 vlan, bool tagged)
{
u64 val;
if (!tagged)
vlan = MLX5V_UNTAGGED;
val = (u64)vlan << 48 |
(u64)mac[0] << 40 |
(u64)mac[1] << 32 |
(u64)mac[2] << 24 |
(u64)mac[3] << 16 |
(u64)mac[4] << 8 |
(u64)mac[5];
return val;
}
static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
{
struct macvlan_node *pos;
u32 idx;
idx = hash_64(value, 8); // tbd 8
hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
if (pos->macvlan == value)
return pos;
}
return NULL;
}
static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
{
struct macvlan_node *ptr;
u64 val;
u32 idx;
int err;
val = search_val(mac, vid, tagged);
if (mac_vlan_lookup(ndev, val))
return -EEXIST;
ptr = kzalloc(sizeof (*ptr), GFP_KERNEL);
if (!ptr)
return -ENOMEM;
ptr->tagged = tagged;
ptr->macvlan = val;
ptr->ndev = ndev;
err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
if (err)
goto err_add;
idx = hash_64(val, 8);
hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
return 0;
err_add:
kfree(ptr);
return err;
}
static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
{
struct macvlan_node *ptr;
ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
if (!ptr)
return ;
hlist_del(&ptr->hlist);
mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
remove_steering_counters(ndev, ptr);
kfree(ptr);
}
static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
{
struct macvlan_node *pos;
struct hlist_node *n;
int i;
for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
hlist_del(&pos->hlist);
mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
remove_steering_counters(ndev, pos);
kfree(pos);
}
}
}
static int setup_steering(struct mlx5_vdpa_net *ndev)
{
struct mlx5_flow_table_attr ft_attr = {};
struct mlx5_flow_namespace *ns;
int err;
ft_attr.max_fte = MAX_STEERING_ENT;
ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
if (!ns) {
mlx5_vdpa_err(&ndev->mvdev, "failed to get flow namespace\n" );
return -EOPNOTSUPP;
}
ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
if (IS_ERR(ndev->rxft)) {
mlx5_vdpa_err(&ndev->mvdev, "failed to create flow table\n" );
return PTR_ERR(ndev->rxft);
}
mlx5_vdpa_add_rx_flow_table(ndev);
err = mac_vlan_add(ndev, ndev->config.mac, 0, false );
if (err)
goto err_add;
return 0;
err_add:
mlx5_vdpa_remove_rx_flow_table(ndev);
mlx5_destroy_flow_table(ndev->rxft);
return err;
}
static void teardown_steering(struct mlx5_vdpa_net *ndev)
{
clear_mac_vlan_table(ndev);
mlx5_vdpa_remove_rx_flow_table(ndev);
mlx5_destroy_flow_table(ndev->rxft);
}
static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
{
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct mlx5_control_vq *cvq = &mvdev->cvq;
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
struct mlx5_core_dev *pfmdev;
size_t read;
u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
switch (cmd) {
case VIRTIO_NET_CTRL_MAC_ADDR_SET:
read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
if (read != ETH_ALEN)
break ;
if (!memcmp(ndev->config.mac, mac, 6)) {
status = VIRTIO_NET_OK;
break ;
}
if (is_zero_ether_addr(mac))
break ;
if (!is_zero_ether_addr(ndev->config.mac)) {
if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n" ,
ndev->config.mac);
break ;
}
}
if (mlx5_mpfs_add_mac(pfmdev, mac)) {
mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n" ,
mac);
break ;
}
/* backup the original mac address so that if failed to add the forward rules
* we could restore it
*/
memcpy(mac_back, ndev->config.mac, ETH_ALEN);
memcpy(ndev->config.mac, mac, ETH_ALEN);
/* Need recreate the flow table entry, so that the packet could forward back
*/
mac_vlan_del(ndev, mac_back, 0, false );
if (mac_vlan_add(ndev, ndev->config.mac, 0, false )) {
mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n" );
/* Although it hardly run here, we still need double check */
if (is_zero_ether_addr(mac_back)) {
mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n" );
break ;
}
/* Try to restore original mac address to MFPS table, and try to restore
* the forward rule entry.
*/
if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n" ,
ndev->config.mac);
}
if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n" ,
mac_back);
}
memcpy(ndev->config.mac, mac_back, ETH_ALEN);
if (mac_vlan_add(ndev, ndev->config.mac, 0, false ))
mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n" );
break ;
}
status = VIRTIO_NET_OK;
break ;
default :
break ;
}
return status;
}
static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
{
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
int cur_vqs = ndev->cur_num_vqs;
int new_vqs = newqps * 2;
int err;
int i;
if (cur_vqs > new_vqs) {
err = modify_rqt(ndev, new_vqs);
if (err)
return err;
if (is_resumable(ndev)) {
suspend_vqs(ndev, new_vqs, cur_vqs - new_vqs);
} else {
for (i = new_vqs; i < cur_vqs; i++)
teardown_vq(ndev, &ndev->vqs[i]);
}
ndev->cur_num_vqs = new_vqs;
} else {
ndev->cur_num_vqs = new_vqs;
for (i = cur_vqs; i < new_vqs; i++) {
err = setup_vq(ndev, &ndev->vqs[i], false );
if (err)
goto clean_added;
}
err = resume_vqs(ndev, cur_vqs, new_vqs - cur_vqs);
if (err)
goto clean_added;
err = modify_rqt(ndev, new_vqs);
if (err)
goto clean_added;
}
return 0;
clean_added:
for (--i; i >= cur_vqs; --i)
teardown_vq(ndev, &ndev->vqs[i]);
ndev->cur_num_vqs = cur_vqs;
return err;
}
static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
{
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
struct mlx5_control_vq *cvq = &mvdev->cvq;
struct virtio_net_ctrl_mq mq;
size_t read;
u16 newqps;
switch (cmd) {
case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
/* This mq feature check aligns with pre-existing userspace
* implementation.
*
* Without it, an untrusted driver could fake a multiqueue config
* request down to a non-mq device that may cause kernel to
* panic due to uninitialized resources for extra vqs. Even with
* a well behaving guest driver, it is not expected to allow
* changing the number of vqs on a non-mq device.
*/
if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
break ;
read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof (mq));
if (read != sizeof (mq))
break ;
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5 C=100 H=92 G=95
¤ Dauer der Verarbeitung: 0.12 Sekunden
(vorverarbeitet)
¤
*© Formatika GbR, Deutschland