Quellcodebibliothek Statistik Leitseite products/sources/formale Sprachen/C/Linux/drivers/infiniband/core/   (Open Source Betriebssystem Version 6.17.9©)  Datei vom 24.10.2025 mit Größe 129 kB image not shown  

Quelle  cm.c   Sprache: C

 
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/*
 * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
 * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
 * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
 * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
 * Copyright (c) 2019, Mellanox Technologies inc.  All rights reserved.
 */


#include <linux/completion.h>
#include <linux/dma-mapping.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/err.h>
#include <linux/idr.h>
#include <linux/interrupt.h>
#include <linux/random.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/slab.h>
#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include <linux/kdev_t.h>
#include <linux/etherdevice.h>

#include <rdma/ib_cache.h>
#include <rdma/ib_cm.h>
#include <rdma/ib_sysfs.h>
#include "cm_msgs.h"
#include "core_priv.h"
#include "cm_trace.h"

MODULE_AUTHOR("Sean Hefty");
MODULE_DESCRIPTION("InfiniBand CM");
MODULE_LICENSE("Dual BSD/GPL");

#define CM_DESTROY_ID_WAIT_TIMEOUT 10000 /* msecs */
#define CM_DIRECT_RETRY_CTX ((void *) 1UL)
#define CM_MRA_SETTING 24 /* 4.096us * 2^24 = ~68.7 seconds */

static const char * const ibcm_rej_reason_strs[] = {
 [IB_CM_REJ_NO_QP]   = "no QP",
 [IB_CM_REJ_NO_EEC]   = "no EEC",
 [IB_CM_REJ_NO_RESOURCES]  = "no resources",
 [IB_CM_REJ_TIMEOUT]   = "timeout",
 [IB_CM_REJ_UNSUPPORTED]   = "unsupported",
 [IB_CM_REJ_INVALID_COMM_ID]  = "invalid comm ID",
 [IB_CM_REJ_INVALID_COMM_INSTANCE] = "invalid comm instance",
 [IB_CM_REJ_INVALID_SERVICE_ID]  = "invalid service ID",
 [IB_CM_REJ_INVALID_TRANSPORT_TYPE] = "invalid transport type",
 [IB_CM_REJ_STALE_CONN]   = "stale conn",
 [IB_CM_REJ_RDC_NOT_EXIST]  = "RDC not exist",
 [IB_CM_REJ_INVALID_GID]   = "invalid GID",
 [IB_CM_REJ_INVALID_LID]   = "invalid LID",
 [IB_CM_REJ_INVALID_SL]   = "invalid SL",
 [IB_CM_REJ_INVALID_TRAFFIC_CLASS] = "invalid traffic class",
 [IB_CM_REJ_INVALID_HOP_LIMIT]  = "invalid hop limit",
 [IB_CM_REJ_INVALID_PACKET_RATE]  = "invalid packet rate",
 [IB_CM_REJ_INVALID_ALT_GID]  = "invalid alt GID",
 [IB_CM_REJ_INVALID_ALT_LID]  = "invalid alt LID",
 [IB_CM_REJ_INVALID_ALT_SL]  = "invalid alt SL",
 [IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS] = "invalid alt traffic class",
 [IB_CM_REJ_INVALID_ALT_HOP_LIMIT] = "invalid alt hop limit",
 [IB_CM_REJ_INVALID_ALT_PACKET_RATE] = "invalid alt packet rate",
 [IB_CM_REJ_PORT_CM_REDIRECT]  = "port CM redirect",
 [IB_CM_REJ_PORT_REDIRECT]  = "port redirect",
 [IB_CM_REJ_INVALID_MTU]   = "invalid MTU",
 [IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES] = "insufficient resp resources",
 [IB_CM_REJ_CONSUMER_DEFINED]  = "consumer defined",
 [IB_CM_REJ_INVALID_RNR_RETRY]  = "invalid RNR retry",
 [IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID] = "duplicate local comm ID",
 [IB_CM_REJ_INVALID_CLASS_VERSION] = "invalid class version",
 [IB_CM_REJ_INVALID_FLOW_LABEL]  = "invalid flow label",
 [IB_CM_REJ_INVALID_ALT_FLOW_LABEL] = "invalid alt flow label",
 [IB_CM_REJ_VENDOR_OPTION_NOT_SUPPORTED] =
  "vendor option is not supported",
};

const char *__attribute_const__ ibcm_reject_msg(int reason)
{
 size_t index = reason;

 if (index < ARRAY_SIZE(ibcm_rej_reason_strs) &&
     ibcm_rej_reason_strs[index])
  return ibcm_rej_reason_strs[index];
 else
  return "unrecognized reason";
}
EXPORT_SYMBOL(ibcm_reject_msg);

struct cm_id_private;
struct cm_work;
static int cm_add_one(struct ib_device *device);
static void cm_remove_one(struct ib_device *device, void *client_data);
static void cm_process_work(struct cm_id_private *cm_id_priv,
       struct cm_work *work);
static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
       struct ib_cm_sidr_rep_param *param);
static void cm_issue_dreq(struct cm_id_private *cm_id_priv);
static int cm_send_drep_locked(struct cm_id_private *cm_id_priv,
          void *private_data, u8 private_data_len);
static int cm_send_rej_locked(struct cm_id_private *cm_id_priv,
         enum ib_cm_rej_reason reason, void *ari,
         u8 ari_length, const void *private_data,
         u8 private_data_len);

static struct ib_client cm_client = {
 .name   = "cm",
 .add    = cm_add_one,
 .remove = cm_remove_one
};

static struct ib_cm {
 spinlock_t lock;
 struct list_head device_list;
 rwlock_t device_lock;
 struct rb_root listen_service_table;
 u64 listen_service_id;
 /* struct rb_root peer_service_table; todo: fix peer to peer */
 struct rb_root remote_qp_table;
 struct rb_root remote_id_table;
 struct rb_root remote_sidr_table;
 struct xarray local_id_table;
 u32 local_id_next;
 __be32 random_id_operand;
 struct list_head timewait_list;
 struct workqueue_struct *wq;
} cm;

/* Counter indexes ordered by attribute ID */
enum {
 CM_REQ_COUNTER,
 CM_MRA_COUNTER,
 CM_REJ_COUNTER,
 CM_REP_COUNTER,
 CM_RTU_COUNTER,
 CM_DREQ_COUNTER,
 CM_DREP_COUNTER,
 CM_SIDR_REQ_COUNTER,
 CM_SIDR_REP_COUNTER,
 CM_LAP_COUNTER,
 CM_APR_COUNTER,
 CM_ATTR_COUNT,
 CM_ATTR_ID_OFFSET = 0x0010,
};

enum {
 CM_XMIT,
 CM_XMIT_RETRIES,
 CM_RECV,
 CM_RECV_DUPLICATES,
 CM_COUNTER_GROUPS
};

struct cm_counter_attribute {
 struct ib_port_attribute attr;
 unsigned short group;
 unsigned short index;
};

struct cm_port {
 struct cm_device *cm_dev;
 struct ib_mad_agent *mad_agent;
 struct ib_mad_agent *rep_agent;
 u32 port_num;
 atomic_long_t counters[CM_COUNTER_GROUPS][CM_ATTR_COUNT];
};

struct cm_device {
 struct kref kref;
 struct list_head list;
 rwlock_t mad_agent_lock;
 struct ib_device *ib_device;
 u8 ack_delay;
 int going_down;
 struct cm_port *port[];
};

struct cm_av {
 struct cm_port *port;
 struct rdma_ah_attr ah_attr;
 u16 dlid_datapath;
 u16 pkey_index;
 u8 timeout;
};

struct cm_work {
 struct delayed_work work;
 struct list_head list;
 struct cm_port *port;
 struct ib_mad_recv_wc *mad_recv_wc; /* Received MADs */
 __be32 local_id;   /* Established / timewait */
 __be32 remote_id;
 struct ib_cm_event cm_event;
 struct sa_path_rec path[];
};

struct cm_timewait_info {
 struct cm_work work;
 struct list_head list;
 struct rb_node remote_qp_node;
 struct rb_node remote_id_node;
 __be64 remote_ca_guid;
 __be32 remote_qpn;
 u8 inserted_remote_qp;
 u8 inserted_remote_id;
};

struct cm_id_private {
 struct ib_cm_id id;

 struct rb_node service_node;
 struct rb_node sidr_id_node;
 u32 sidr_slid;
 spinlock_t lock; /* Do not acquire inside cm.lock */
 struct completion comp;
 refcount_t refcount;
 /* Number of clients sharing this ib_cm_id. Only valid for listeners.
 * Protected by the cm.lock spinlock.
 */

 int listen_sharecount;
 struct rcu_head rcu;

 struct ib_mad_send_buf *msg;
 struct cm_timewait_info *timewait_info;
 /* todo: use alternate port on send failure */
 struct cm_av av;
 struct cm_av alt_av;

 void *private_data;
 __be64 tid;
 __be32 local_qpn;
 __be32 remote_qpn;
 enum ib_qp_type qp_type;
 __be32 sq_psn;
 __be32 rq_psn;
 int timeout_ms;
 enum ib_mtu path_mtu;
 __be16 pkey;
 u8 private_data_len;
 u8 max_cm_retries;
 u8 responder_resources;
 u8 initiator_depth;
 u8 retry_count;
 u8 rnr_retry_count;
 u8 target_ack_delay;

 struct list_head work_list;
 atomic_t work_count;

 struct rdma_ucm_ece ece;
};

static void cm_dev_release(struct kref *kref)
{
 struct cm_device *cm_dev = container_of(kref, struct cm_device, kref);
 u32 i;

 rdma_for_each_port(cm_dev->ib_device, i)
  kfree(cm_dev->port[i - 1]);

 kfree(cm_dev);
}

static void cm_device_put(struct cm_device *cm_dev)
{
 kref_put(&cm_dev->kref, cm_dev_release);
}

static void cm_work_handler(struct work_struct *work);

static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
{
 if (refcount_dec_and_test(&cm_id_priv->refcount))
  complete(&cm_id_priv->comp);
}

static struct ib_mad_send_buf *
cm_alloc_msg_agent(struct cm_id_private *cm_id_priv, bool rep_agent)
{
 struct ib_mad_agent *mad_agent;
 struct ib_mad_send_buf *m;
 struct ib_ah *ah;

 lockdep_assert_held(&cm_id_priv->lock);

 if (!cm_id_priv->av.port)
  return ERR_PTR(-EINVAL);

 read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
 mad_agent = rep_agent ? cm_id_priv->av.port->rep_agent :
    cm_id_priv->av.port->mad_agent;
 if (!mad_agent) {
  m = ERR_PTR(-EINVAL);
  goto out;
 }

 ah = rdma_create_ah(mad_agent->qp->pd, &cm_id_priv->av.ah_attr, 0);
 if (IS_ERR(ah)) {
  m = ERR_CAST(ah);
  goto out;
 }

 m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
          cm_id_priv->av.pkey_index,
          0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
          GFP_ATOMIC,
          IB_MGMT_BASE_VERSION);
 if (IS_ERR(m)) {
  rdma_destroy_ah(ah, 0);
  goto out;
 }

 m->ah = ah;

out:
 read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
 return m;
}

static struct ib_mad_send_buf *cm_alloc_msg(struct cm_id_private *cm_id_priv)
{
 return cm_alloc_msg_agent(cm_id_priv, false);
}

static void cm_free_msg(struct ib_mad_send_buf *msg)
{
 if (msg->ah)
  rdma_destroy_ah(msg->ah, 0);
 ib_free_send_mad(msg);
}

static struct ib_mad_send_buf *
cm_alloc_priv_msg_rep(struct cm_id_private *cm_id_priv, enum ib_cm_state state,
        bool rep_agent)
{
 struct ib_mad_send_buf *msg;

 lockdep_assert_held(&cm_id_priv->lock);

 msg = cm_alloc_msg_agent(cm_id_priv, rep_agent);
 if (IS_ERR(msg))
  return msg;

 cm_id_priv->msg = msg;
 refcount_inc(&cm_id_priv->refcount);
 msg->context[0] = cm_id_priv;
 msg->context[1] = (void *) (unsigned long) state;

 msg->retries = cm_id_priv->max_cm_retries;
 msg->timeout_ms = cm_id_priv->timeout_ms;

 return msg;
}

static struct ib_mad_send_buf *
cm_alloc_priv_msg(struct cm_id_private *cm_id_priv, enum ib_cm_state state)
{
 return cm_alloc_priv_msg_rep(cm_id_priv, state, false);
}

static void cm_free_priv_msg(struct ib_mad_send_buf *msg)
{
 struct cm_id_private *cm_id_priv = msg->context[0];

 lockdep_assert_held(&cm_id_priv->lock);

 if (!WARN_ON(cm_id_priv->msg != msg))
  cm_id_priv->msg = NULL;

 if (msg->ah)
  rdma_destroy_ah(msg->ah, 0);
 cm_deref_id(cm_id_priv);
 ib_free_send_mad(msg);
}

static struct ib_mad_send_buf *
cm_alloc_response_msg_no_ah(struct cm_port *port,
       struct ib_mad_recv_wc *mad_recv_wc,
       bool direct_retry)
{
 struct ib_mad_send_buf *m;

 m = ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
          0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
          GFP_ATOMIC, IB_MGMT_BASE_VERSION);
 if (!IS_ERR(m))
  m->context[0] = direct_retry ? CM_DIRECT_RETRY_CTX : NULL;

 return m;
}

static int cm_create_response_msg_ah(struct cm_port *port,
         struct ib_mad_recv_wc *mad_recv_wc,
         struct ib_mad_send_buf *msg)
{
 struct ib_ah *ah;

 ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
      mad_recv_wc->recv_buf.grh, port->port_num);
 if (IS_ERR(ah))
  return PTR_ERR(ah);

 msg->ah = ah;
 return 0;
}

static int cm_alloc_response_msg(struct cm_port *port,
     struct ib_mad_recv_wc *mad_recv_wc,
     bool direct_retry,
     struct ib_mad_send_buf **msg)
{
 struct ib_mad_send_buf *m;
 int ret;

 m = cm_alloc_response_msg_no_ah(port, mad_recv_wc, direct_retry);
 if (IS_ERR(m))
  return PTR_ERR(m);

 ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
 if (ret) {
  ib_free_send_mad(m);
  return ret;
 }

 *msg = m;
 return 0;
}

static void *cm_copy_private_data(const void *private_data, u8 private_data_len)
{
 void *data;

 if (!private_data || !private_data_len)
  return NULL;

 data = kmemdup(private_data, private_data_len, GFP_KERNEL);
 if (!data)
  return ERR_PTR(-ENOMEM);

 return data;
}

static void cm_set_private_data(struct cm_id_private *cm_id_priv,
     void *private_data, u8 private_data_len)
{
 if (cm_id_priv->private_data && cm_id_priv->private_data_len)
  kfree(cm_id_priv->private_data);

 cm_id_priv->private_data = private_data;
 cm_id_priv->private_data_len = private_data_len;
}

static void cm_set_av_port(struct cm_av *av, struct cm_port *port)
{
 struct cm_port *old_port = av->port;

 if (old_port == port)
  return;

 av->port = port;
 if (old_port)
  cm_device_put(old_port->cm_dev);
 if (port)
  kref_get(&port->cm_dev->kref);
}

static void cm_init_av_for_lap(struct cm_port *port, struct ib_wc *wc,
          struct rdma_ah_attr *ah_attr, struct cm_av *av)
{
 cm_set_av_port(av, port);
 av->pkey_index = wc->pkey_index;
 rdma_move_ah_attr(&av->ah_attr, ah_attr);
}

static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
       struct ib_grh *grh, struct cm_av *av)
{
 cm_set_av_port(av, port);
 av->pkey_index = wc->pkey_index;
 return ib_init_ah_attr_from_wc(port->cm_dev->ib_device,
           port->port_num, wc,
           grh, &av->ah_attr);
}

static struct cm_port *
get_cm_port_from_path(struct sa_path_rec *path, const struct ib_gid_attr *attr)
{
 struct cm_device *cm_dev;
 struct cm_port *port = NULL;
 unsigned long flags;

 if (attr) {
  read_lock_irqsave(&cm.device_lock, flags);
  list_for_each_entry(cm_dev, &cm.device_list, list) {
   if (cm_dev->ib_device == attr->device) {
    port = cm_dev->port[attr->port_num - 1];
    break;
   }
  }
  read_unlock_irqrestore(&cm.device_lock, flags);
 } else {
  /* SGID attribute can be NULL in following
 * conditions.
 * (a) Alternative path
 * (b) IB link layer without GRH
 * (c) LAP send messages
 */

  read_lock_irqsave(&cm.device_lock, flags);
  list_for_each_entry(cm_dev, &cm.device_list, list) {
   attr = rdma_find_gid(cm_dev->ib_device,
          &path->sgid,
          sa_conv_pathrec_to_gid_type(path),
          NULL);
   if (!IS_ERR(attr)) {
    port = cm_dev->port[attr->port_num - 1];
    break;
   }
  }
  read_unlock_irqrestore(&cm.device_lock, flags);
  if (port)
   rdma_put_gid_attr(attr);
 }
 return port;
}

static int cm_init_av_by_path(struct sa_path_rec *path,
         const struct ib_gid_attr *sgid_attr,
         struct cm_av *av)
{
 struct rdma_ah_attr new_ah_attr;
 struct cm_device *cm_dev;
 struct cm_port *port;
 int ret;

 port = get_cm_port_from_path(path, sgid_attr);
 if (!port)
  return -EINVAL;
 cm_dev = port->cm_dev;

 ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
      be16_to_cpu(path->pkey), &av->pkey_index);
 if (ret)
  return ret;

 cm_set_av_port(av, port);

 /*
 * av->ah_attr might be initialized based on wc or during
 * request processing time which might have reference to sgid_attr.
 * So initialize a new ah_attr on stack.
 * If initialization fails, old ah_attr is used for sending any
 * responses. If initialization is successful, than new ah_attr
 * is used by overwriting the old one. So that right ah_attr
 * can be used to return an error response.
 */

 ret = ib_init_ah_attr_from_path(cm_dev->ib_device, port->port_num, path,
     &new_ah_attr, sgid_attr);
 if (ret)
  return ret;

 av->timeout = path->packet_life_time + 1;
 rdma_move_ah_attr(&av->ah_attr, &new_ah_attr);
 return 0;
}

/* Move av created by cm_init_av_by_path(), so av.dgid is not moved */
static void cm_move_av_from_path(struct cm_av *dest, struct cm_av *src)
{
 cm_set_av_port(dest, src->port);
 cm_set_av_port(src, NULL);
 dest->pkey_index = src->pkey_index;
 rdma_move_ah_attr(&dest->ah_attr, &src->ah_attr);
 dest->timeout = src->timeout;
}

static void cm_destroy_av(struct cm_av *av)
{
 rdma_destroy_ah_attr(&av->ah_attr);
 cm_set_av_port(av, NULL);
}

static u32 cm_local_id(__be32 local_id)
{
 return (__force u32) (local_id ^ cm.random_id_operand);
}

static struct cm_id_private *cm_acquire_id(__be32 local_id, __be32 remote_id)
{
 struct cm_id_private *cm_id_priv;

 rcu_read_lock();
 cm_id_priv = xa_load(&cm.local_id_table, cm_local_id(local_id));
 if (!cm_id_priv || cm_id_priv->id.remote_id != remote_id ||
     !refcount_inc_not_zero(&cm_id_priv->refcount))
  cm_id_priv = NULL;
 rcu_read_unlock();

 return cm_id_priv;
}

/*
 * Trivial helpers to strip endian annotation and compare; the
 * endianness doesn't actually matter since we just need a stable
 * order for the RB tree.
 */

static int be32_lt(__be32 a, __be32 b)
{
 return (__force u32) a < (__force u32) b;
}

static int be32_gt(__be32 a, __be32 b)
{
 return (__force u32) a > (__force u32) b;
}

static int be64_lt(__be64 a, __be64 b)
{
 return (__force u64) a < (__force u64) b;
}

static int be64_gt(__be64 a, __be64 b)
{
 return (__force u64) a > (__force u64) b;
}

/*
 * Inserts a new cm_id_priv into the listen_service_table. Returns cm_id_priv
 * if the new ID was inserted, NULL if it could not be inserted due to a
 * collision, or the existing cm_id_priv ready for shared usage.
 */

static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv,
           ib_cm_handler shared_handler)
{
 struct rb_node **link = &cm.listen_service_table.rb_node;
 struct rb_node *parent = NULL;
 struct cm_id_private *cur_cm_id_priv;
 __be64 service_id = cm_id_priv->id.service_id;
 unsigned long flags;

 spin_lock_irqsave(&cm.lock, flags);
 while (*link) {
  parent = *link;
  cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
       service_node);

  if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
   link = &(*link)->rb_left;
  else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
   link = &(*link)->rb_right;
  else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
   link = &(*link)->rb_left;
  else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
   link = &(*link)->rb_right;
  else {
   /*
 * Sharing an ib_cm_id with different handlers is not
 * supported
 */

   if (cur_cm_id_priv->id.cm_handler != shared_handler ||
       cur_cm_id_priv->id.context ||
       WARN_ON(!cur_cm_id_priv->id.cm_handler)) {
    spin_unlock_irqrestore(&cm.lock, flags);
    return NULL;
   }
   refcount_inc(&cur_cm_id_priv->refcount);
   cur_cm_id_priv->listen_sharecount++;
   spin_unlock_irqrestore(&cm.lock, flags);
   return cur_cm_id_priv;
  }
 }
 cm_id_priv->listen_sharecount++;
 rb_link_node(&cm_id_priv->service_node, parent, link);
 rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
 spin_unlock_irqrestore(&cm.lock, flags);
 return cm_id_priv;
}

static struct cm_id_private *cm_find_listen(struct ib_device *device,
         __be64 service_id)
{
 struct rb_node *node = cm.listen_service_table.rb_node;
 struct cm_id_private *cm_id_priv;

 while (node) {
  cm_id_priv = rb_entry(node, struct cm_id_private, service_node);

  if (device < cm_id_priv->id.device)
   node = node->rb_left;
  else if (device > cm_id_priv->id.device)
   node = node->rb_right;
  else if (be64_lt(service_id, cm_id_priv->id.service_id))
   node = node->rb_left;
  else if (be64_gt(service_id, cm_id_priv->id.service_id))
   node = node->rb_right;
  else {
   refcount_inc(&cm_id_priv->refcount);
   return cm_id_priv;
  }
 }
 return NULL;
}

static struct cm_timewait_info *
cm_insert_remote_id(struct cm_timewait_info *timewait_info)
{
 struct rb_node **link = &cm.remote_id_table.rb_node;
 struct rb_node *parent = NULL;
 struct cm_timewait_info *cur_timewait_info;
 __be64 remote_ca_guid = timewait_info->remote_ca_guid;
 __be32 remote_id = timewait_info->work.remote_id;

 while (*link) {
  parent = *link;
  cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
          remote_id_node);
  if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
   link = &(*link)->rb_left;
  else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
   link = &(*link)->rb_right;
  else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
   link = &(*link)->rb_left;
  else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
   link = &(*link)->rb_right;
  else
   return cur_timewait_info;
 }
 timewait_info->inserted_remote_id = 1;
 rb_link_node(&timewait_info->remote_id_node, parent, link);
 rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
 return NULL;
}

static struct cm_id_private *cm_find_remote_id(__be64 remote_ca_guid,
            __be32 remote_id)
{
 struct rb_node *node = cm.remote_id_table.rb_node;
 struct cm_timewait_info *timewait_info;
 struct cm_id_private *res = NULL;

 spin_lock_irq(&cm.lock);
 while (node) {
  timewait_info = rb_entry(node, struct cm_timewait_info,
      remote_id_node);
  if (be32_lt(remote_id, timewait_info->work.remote_id))
   node = node->rb_left;
  else if (be32_gt(remote_id, timewait_info->work.remote_id))
   node = node->rb_right;
  else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
   node = node->rb_left;
  else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
   node = node->rb_right;
  else {
   res = cm_acquire_id(timewait_info->work.local_id,
          timewait_info->work.remote_id);
   break;
  }
 }
 spin_unlock_irq(&cm.lock);
 return res;
}

static struct cm_timewait_info *
cm_insert_remote_qpn(struct cm_timewait_info *timewait_info)
{
 struct rb_node **link = &cm.remote_qp_table.rb_node;
 struct rb_node *parent = NULL;
 struct cm_timewait_info *cur_timewait_info;
 __be64 remote_ca_guid = timewait_info->remote_ca_guid;
 __be32 remote_qpn = timewait_info->remote_qpn;

 while (*link) {
  parent = *link;
  cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
          remote_qp_node);
  if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
   link = &(*link)->rb_left;
  else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
   link = &(*link)->rb_right;
  else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
   link = &(*link)->rb_left;
  else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
   link = &(*link)->rb_right;
  else
   return cur_timewait_info;
 }
 timewait_info->inserted_remote_qp = 1;
 rb_link_node(&timewait_info->remote_qp_node, parent, link);
 rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
 return NULL;
}

static struct cm_id_private *
cm_insert_remote_sidr(struct cm_id_private *cm_id_priv)
{
 struct rb_node **link = &cm.remote_sidr_table.rb_node;
 struct rb_node *parent = NULL;
 struct cm_id_private *cur_cm_id_priv;
 __be32 remote_id = cm_id_priv->id.remote_id;

 while (*link) {
  parent = *link;
  cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
       sidr_id_node);
  if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
   link = &(*link)->rb_left;
  else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
   link = &(*link)->rb_right;
  else {
   if (cur_cm_id_priv->sidr_slid < cm_id_priv->sidr_slid)
    link = &(*link)->rb_left;
   else if (cur_cm_id_priv->sidr_slid > cm_id_priv->sidr_slid)
    link = &(*link)->rb_right;
   else
    return cur_cm_id_priv;
  }
 }
 rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
 rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
 return NULL;
}

static struct cm_id_private *cm_alloc_id_priv(struct ib_device *device,
           ib_cm_handler cm_handler,
           void *context)
{
 struct cm_id_private *cm_id_priv;
 u32 id;
 int ret;

 cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
 if (!cm_id_priv)
  return ERR_PTR(-ENOMEM);

 cm_id_priv->id.state = IB_CM_IDLE;
 cm_id_priv->id.device = device;
 cm_id_priv->id.cm_handler = cm_handler;
 cm_id_priv->id.context = context;
 cm_id_priv->id.remote_cm_qpn = 1;

 RB_CLEAR_NODE(&cm_id_priv->service_node);
 RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
 spin_lock_init(&cm_id_priv->lock);
 init_completion(&cm_id_priv->comp);
 INIT_LIST_HEAD(&cm_id_priv->work_list);
 atomic_set(&cm_id_priv->work_count, -1);
 refcount_set(&cm_id_priv->refcount, 1);

 ret = xa_alloc_cyclic(&cm.local_id_table, &id, NULL, xa_limit_32b,
         &cm.local_id_next, GFP_KERNEL);
 if (ret < 0)
  goto error;
 cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;

 return cm_id_priv;

error:
 kfree(cm_id_priv);
 return ERR_PTR(ret);
}

/*
 * Make the ID visible to the MAD handlers and other threads that use the
 * xarray.
 */

static void cm_finalize_id(struct cm_id_private *cm_id_priv)
{
 xa_store(&cm.local_id_table, cm_local_id(cm_id_priv->id.local_id),
   cm_id_priv, GFP_ATOMIC);
}

struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
     ib_cm_handler cm_handler,
     void *context)
{
 struct cm_id_private *cm_id_priv;

 cm_id_priv = cm_alloc_id_priv(device, cm_handler, context);
 if (IS_ERR(cm_id_priv))
  return ERR_CAST(cm_id_priv);

 cm_finalize_id(cm_id_priv);
 return &cm_id_priv->id;
}
EXPORT_SYMBOL(ib_create_cm_id);

static struct cm_work *cm_dequeue_work(struct cm_id_private *cm_id_priv)
{
 struct cm_work *work;

 if (list_empty(&cm_id_priv->work_list))
  return NULL;

 work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
 list_del(&work->list);
 return work;
}

static void cm_free_work(struct cm_work *work)
{
 if (work->mad_recv_wc)
  ib_free_recv_mad(work->mad_recv_wc);
 kfree(work);
}

static void cm_queue_work_unlock(struct cm_id_private *cm_id_priv,
     struct cm_work *work)
 __releases(&cm_id_priv->lock)
{
 bool immediate;

 /*
 * To deliver the event to the user callback we have the drop the
 * spinlock, however, we need to ensure that the user callback is single
 * threaded and receives events in the temporal order. If there are
 * already events being processed then thread new events onto a list,
 * the thread currently processing will pick them up.
 */

 immediate = atomic_inc_and_test(&cm_id_priv->work_count);
 if (!immediate) {
  list_add_tail(&work->list, &cm_id_priv->work_list);
  /*
 * This routine always consumes incoming reference. Once queued
 * to the work_list then a reference is held by the thread
 * currently running cm_process_work() and this reference is not
 * needed.
 */

  cm_deref_id(cm_id_priv);
 }
 spin_unlock_irq(&cm_id_priv->lock);

 if (immediate)
  cm_process_work(cm_id_priv, work);
}

static inline int cm_convert_to_ms(int iba_time)
{
 /* approximate conversion to ms from 4.096us x 2^iba_time */
 return 1 << max(iba_time - 8, 0);
}

/*
 * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
 * Because of how ack_timeout is stored, adding one doubles the timeout.
 * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
 * increment it (round up) only if the other is within 50%.
 */

static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
{
 int ack_timeout = packet_life_time + 1;

 if (ack_timeout >= ca_ack_delay)
  ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
 else
  ack_timeout = ca_ack_delay +
         (ack_timeout >= (ca_ack_delay - 1));

 return min(31, ack_timeout);
}

static void cm_remove_remote(struct cm_id_private *cm_id_priv)
{
 struct cm_timewait_info *timewait_info = cm_id_priv->timewait_info;

 if (timewait_info->inserted_remote_id) {
  rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
  timewait_info->inserted_remote_id = 0;
 }

 if (timewait_info->inserted_remote_qp) {
  rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
  timewait_info->inserted_remote_qp = 0;
 }
}

static struct cm_timewait_info *cm_create_timewait_info(__be32 local_id)
{
 struct cm_timewait_info *timewait_info;

 timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
 if (!timewait_info)
  return ERR_PTR(-ENOMEM);

 timewait_info->work.local_id = local_id;
 INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
 timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
 return timewait_info;
}

static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
{
 int wait_time;
 unsigned long flags;
 struct cm_device *cm_dev;

 lockdep_assert_held(&cm_id_priv->lock);

 cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
 if (!cm_dev)
  return;

 spin_lock_irqsave(&cm.lock, flags);
 cm_remove_remote(cm_id_priv);
 list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
 spin_unlock_irqrestore(&cm.lock, flags);

 /*
 * The cm_id could be destroyed by the user before we exit timewait.
 * To protect against this, we search for the cm_id after exiting
 * timewait before notifying the user that we've exited timewait.
 */

 cm_id_priv->id.state = IB_CM_TIMEWAIT;
 wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);

 /* Check if the device started its remove_one */
 spin_lock_irqsave(&cm.lock, flags);
 if (!cm_dev->going_down)
  queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
       msecs_to_jiffies(wait_time));
 spin_unlock_irqrestore(&cm.lock, flags);

 /*
 * The timewait_info is converted into a work and gets freed during
 * cm_free_work() in cm_timewait_handler().
 */

 BUILD_BUG_ON(offsetof(struct cm_timewait_info, work) != 0);
 cm_id_priv->timewait_info = NULL;
}

static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
{
 unsigned long flags;

 lockdep_assert_held(&cm_id_priv->lock);

 cm_id_priv->id.state = IB_CM_IDLE;
 if (cm_id_priv->timewait_info) {
  spin_lock_irqsave(&cm.lock, flags);
  cm_remove_remote(cm_id_priv);
  spin_unlock_irqrestore(&cm.lock, flags);
  kfree(cm_id_priv->timewait_info);
  cm_id_priv->timewait_info = NULL;
 }
}

static noinline void cm_destroy_id_wait_timeout(struct ib_cm_id *cm_id,
      enum ib_cm_state old_state)
{
 struct cm_id_private *cm_id_priv;

 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
 pr_err_ratelimited("%s: cm_id=%p timed out. state %d -> %d, refcnt=%d\n", __func__,
      cm_id, old_state, cm_id->state, refcount_read(&cm_id_priv->refcount));
}

static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
{
 struct cm_id_private *cm_id_priv;
 enum ib_cm_state old_state;
 struct cm_work *work;
 int ret;

 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
 spin_lock_irq(&cm_id_priv->lock);
 old_state = cm_id->state;
retest:
 switch (cm_id->state) {
 case IB_CM_LISTEN:
  spin_lock(&cm.lock);
  if (--cm_id_priv->listen_sharecount > 0) {
   /* The id is still shared. */
   WARN_ON(refcount_read(&cm_id_priv->refcount) == 1);
   spin_unlock(&cm.lock);
   spin_unlock_irq(&cm_id_priv->lock);
   cm_deref_id(cm_id_priv);
   return;
  }
  cm_id->state = IB_CM_IDLE;
  rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
  RB_CLEAR_NODE(&cm_id_priv->service_node);
  spin_unlock(&cm.lock);
  break;
 case IB_CM_SIDR_REQ_SENT:
  cm_id->state = IB_CM_IDLE;
  ib_cancel_mad(cm_id_priv->msg);
  break;
 case IB_CM_SIDR_REQ_RCVD:
  cm_send_sidr_rep_locked(cm_id_priv,
     &(struct ib_cm_sidr_rep_param){
      .status = IB_SIDR_REJECT });
  /* cm_send_sidr_rep_locked will not move to IDLE if it fails */
  cm_id->state = IB_CM_IDLE;
  break;
 case IB_CM_REQ_SENT:
 case IB_CM_MRA_REQ_RCVD:
  ib_cancel_mad(cm_id_priv->msg);
  cm_send_rej_locked(cm_id_priv, IB_CM_REJ_TIMEOUT,
       &cm_id_priv->id.device->node_guid,
       sizeof(cm_id_priv->id.device->node_guid),
       NULL, 0);
  break;
 case IB_CM_REQ_RCVD:
  if (err == -ENOMEM) {
   /* Do not reject to allow future retries. */
   cm_reset_to_idle(cm_id_priv);
  } else {
   cm_send_rej_locked(cm_id_priv,
        IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
        NULL, 0);
  }
  break;
 case IB_CM_REP_SENT:
 case IB_CM_MRA_REP_RCVD:
  ib_cancel_mad(cm_id_priv->msg);
  cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
       0, NULL, 0);
  goto retest;
 case IB_CM_MRA_REQ_SENT:
 case IB_CM_REP_RCVD:
 case IB_CM_MRA_REP_SENT:
  cm_send_rej_locked(cm_id_priv, IB_CM_REJ_CONSUMER_DEFINED, NULL,
       0, NULL, 0);
  break;
 case IB_CM_ESTABLISHED:
  if (cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
   cm_id->state = IB_CM_IDLE;
   break;
  }
  cm_issue_dreq(cm_id_priv);
  cm_enter_timewait(cm_id_priv);
  goto retest;
 case IB_CM_DREQ_SENT:
  ib_cancel_mad(cm_id_priv->msg);
  cm_enter_timewait(cm_id_priv);
  goto retest;
 case IB_CM_DREQ_RCVD:
  cm_send_drep_locked(cm_id_priv, NULL, 0);
  WARN_ON(cm_id->state != IB_CM_TIMEWAIT);
  goto retest;
 case IB_CM_TIMEWAIT:
  /*
 * The cm_acquire_id in cm_timewait_handler will stop working
 * once we do xa_erase below, so just move to idle here for
 * consistency.
 */

  cm_id->state = IB_CM_IDLE;
  break;
 case IB_CM_IDLE:
  break;
 }
 WARN_ON(cm_id->state != IB_CM_IDLE);

 spin_lock(&cm.lock);
 /* Required for cleanup paths related cm_req_handler() */
 if (cm_id_priv->timewait_info) {
  cm_remove_remote(cm_id_priv);
  kfree(cm_id_priv->timewait_info);
  cm_id_priv->timewait_info = NULL;
 }

 WARN_ON(cm_id_priv->listen_sharecount);
 WARN_ON(!RB_EMPTY_NODE(&cm_id_priv->service_node));
 if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
  rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
 spin_unlock(&cm.lock);
 spin_unlock_irq(&cm_id_priv->lock);

 xa_erase(&cm.local_id_table, cm_local_id(cm_id->local_id));
 cm_deref_id(cm_id_priv);
 do {
  ret = wait_for_completion_timeout(&cm_id_priv->comp,
        msecs_to_jiffies(
        CM_DESTROY_ID_WAIT_TIMEOUT));
  if (!ret) /* timeout happened */
   cm_destroy_id_wait_timeout(cm_id, old_state);
 } while (!ret);

 while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
  cm_free_work(work);

 cm_destroy_av(&cm_id_priv->av);
 cm_destroy_av(&cm_id_priv->alt_av);
 kfree(cm_id_priv->private_data);
 kfree_rcu(cm_id_priv, rcu);
}

void ib_destroy_cm_id(struct ib_cm_id *cm_id)
{
 cm_destroy_id(cm_id, 0);
}
EXPORT_SYMBOL(ib_destroy_cm_id);

static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id)
{
 if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
     (service_id != IB_CM_ASSIGN_SERVICE_ID))
  return -EINVAL;

 if (service_id == IB_CM_ASSIGN_SERVICE_ID)
  cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++);
 else
  cm_id_priv->id.service_id = service_id;

 return 0;
}

/**
 * ib_cm_listen - Initiates listening on the specified service ID for
 *   connection and service ID resolution requests.
 * @cm_id: Connection identifier associated with the listen request.
 * @service_id: Service identifier matched against incoming connection
 *   and service ID resolution requests.  The service ID should be specified
 *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
 *   assign a service ID to the caller.
 */

int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id)
{
 struct cm_id_private *cm_id_priv =
  container_of(cm_id, struct cm_id_private, id);
 unsigned long flags;
 int ret;

 spin_lock_irqsave(&cm_id_priv->lock, flags);
 if (cm_id_priv->id.state != IB_CM_IDLE) {
  ret = -EINVAL;
  goto out;
 }

 ret = cm_init_listen(cm_id_priv, service_id);
 if (ret)
  goto out;

 if (!cm_insert_listen(cm_id_priv, NULL)) {
  ret = -EBUSY;
  goto out;
 }

 cm_id_priv->id.state = IB_CM_LISTEN;
 ret = 0;

out:
 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 return ret;
}
EXPORT_SYMBOL(ib_cm_listen);

/**
 * ib_cm_insert_listen - Create a new listening ib_cm_id and listen on
 *  the given service ID.
 *
 * If there's an existing ID listening on that same device and service ID,
 * return it.
 *
 * @device: Device associated with the cm_id.  All related communication will
 * be associated with the specified device.
 * @cm_handler: Callback invoked to notify the user of CM events.
 * @service_id: Service identifier matched against incoming connection
 *   and service ID resolution requests.  The service ID should be specified
 *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
 *   assign a service ID to the caller.
 *
 * Callers should call ib_destroy_cm_id when done with the listener ID.
 */

struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
         ib_cm_handler cm_handler,
         __be64 service_id)
{
 struct cm_id_private *listen_id_priv;
 struct cm_id_private *cm_id_priv;
 int err = 0;

 /* Create an ID in advance, since the creation may sleep */
 cm_id_priv = cm_alloc_id_priv(device, cm_handler, NULL);
 if (IS_ERR(cm_id_priv))
  return ERR_CAST(cm_id_priv);

 err = cm_init_listen(cm_id_priv, service_id);
 if (err) {
  ib_destroy_cm_id(&cm_id_priv->id);
  return ERR_PTR(err);
 }

 spin_lock_irq(&cm_id_priv->lock);
 listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler);
 if (listen_id_priv != cm_id_priv) {
  spin_unlock_irq(&cm_id_priv->lock);
  ib_destroy_cm_id(&cm_id_priv->id);
  if (!listen_id_priv)
   return ERR_PTR(-EINVAL);
  return &listen_id_priv->id;
 }
 cm_id_priv->id.state = IB_CM_LISTEN;
 spin_unlock_irq(&cm_id_priv->lock);

 /*
 * A listen ID does not need to be in the xarray since it does not
 * receive mads, is not placed in the remote_id or remote_qpn rbtree,
 * and does not enter timewait.
 */


 return &cm_id_priv->id;
}
EXPORT_SYMBOL(ib_cm_insert_listen);

static __be64 cm_form_tid(struct cm_id_private *cm_id_priv)
{
 u64 hi_tid = 0, low_tid;

 lockdep_assert_held(&cm_id_priv->lock);

 low_tid = (u64)cm_id_priv->id.local_id;
 if (!cm_id_priv->av.port)
  return cpu_to_be64(low_tid);

 read_lock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
 if (cm_id_priv->av.port->mad_agent)
  hi_tid = ((u64)cm_id_priv->av.port->mad_agent->hi_tid) << 32;
 read_unlock(&cm_id_priv->av.port->cm_dev->mad_agent_lock);
 return cpu_to_be64(hi_tid | low_tid);
}

static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
         __be16 attr_id, __be64 tid)
{
 hdr->base_version  = IB_MGMT_BASE_VERSION;
 hdr->mgmt_class    = IB_MGMT_CLASS_CM;
 hdr->class_version = IB_CM_CLASS_VERSION;
 hdr->method    = IB_MGMT_METHOD_SEND;
 hdr->attr_id    = attr_id;
 hdr->tid    = tid;
}

static void cm_format_mad_ece_hdr(struct ib_mad_hdr *hdr, __be16 attr_id,
      __be64 tid, u32 attr_mod)
{
 cm_format_mad_hdr(hdr, attr_id, tid);
 hdr->attr_mod = cpu_to_be32(attr_mod);
}

static void cm_format_req(struct cm_req_msg *req_msg,
     struct cm_id_private *cm_id_priv,
     struct ib_cm_req_param *param)
{
 struct sa_path_rec *pri_path = param->primary_path;
 struct sa_path_rec *alt_path = param->alternate_path;
 bool pri_ext = false;
 __be16 lid;

 if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA)
  pri_ext = opa_is_extended_lid(pri_path->opa.dlid,
           pri_path->opa.slid);

 cm_format_mad_ece_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
         cm_form_tid(cm_id_priv), param->ece.attr_mod);

 IBA_SET(CM_REQ_LOCAL_COMM_ID, req_msg,
  be32_to_cpu(cm_id_priv->id.local_id));
 IBA_SET(CM_REQ_SERVICE_ID, req_msg, be64_to_cpu(param->service_id));
 IBA_SET(CM_REQ_LOCAL_CA_GUID, req_msg,
  be64_to_cpu(cm_id_priv->id.device->node_guid));
 IBA_SET(CM_REQ_LOCAL_QPN, req_msg, param->qp_num);
 IBA_SET(CM_REQ_INITIATOR_DEPTH, req_msg, param->initiator_depth);
 IBA_SET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg,
  param->remote_cm_response_timeout);
 cm_req_set_qp_type(req_msg, param->qp_type);
 IBA_SET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg, param->flow_control);
 IBA_SET(CM_REQ_STARTING_PSN, req_msg, param->starting_psn);
 IBA_SET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg,
  param->local_cm_response_timeout);
 IBA_SET(CM_REQ_PARTITION_KEY, req_msg,
  be16_to_cpu(param->primary_path->pkey));
 IBA_SET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg,
  param->primary_path->mtu);
 IBA_SET(CM_REQ_MAX_CM_RETRIES, req_msg, param->max_cm_retries);

 if (param->qp_type != IB_QPT_XRC_INI) {
  IBA_SET(CM_REQ_RESPONDER_RESOURCES, req_msg,
   param->responder_resources);
  IBA_SET(CM_REQ_RETRY_COUNT, req_msg, param->retry_count);
  IBA_SET(CM_REQ_RNR_RETRY_COUNT, req_msg,
   param->rnr_retry_count);
  IBA_SET(CM_REQ_SRQ, req_msg, param->srq);
 }

 *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg) =
  pri_path->sgid;
 *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg) =
  pri_path->dgid;
 if (pri_ext) {
  IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg)
   ->global.interface_id =
   OPA_MAKE_ID(be32_to_cpu(pri_path->opa.slid));
  IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg)
   ->global.interface_id =
   OPA_MAKE_ID(be32_to_cpu(pri_path->opa.dlid));
 }
 if (pri_path->hop_limit <= 1) {
  IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
   be16_to_cpu(pri_ext ? 0 :
           htons(ntohl(sa_path_get_slid(
            pri_path)))));
  IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
   be16_to_cpu(pri_ext ? 0 :
           htons(ntohl(sa_path_get_dlid(
            pri_path)))));
 } else {

  if (param->primary_path_inbound) {
   lid = param->primary_path_inbound->ib.dlid;
   IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
    be16_to_cpu(lid));
  } else
   IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
    be16_to_cpu(IB_LID_PERMISSIVE));

  /* Work-around until there's a way to obtain remote LID info */
  IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
   be16_to_cpu(IB_LID_PERMISSIVE));
 }
 IBA_SET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg,
  be32_to_cpu(pri_path->flow_label));
 IBA_SET(CM_REQ_PRIMARY_PACKET_RATE, req_msg, pri_path->rate);
 IBA_SET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg, pri_path->traffic_class);
 IBA_SET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg, pri_path->hop_limit);
 IBA_SET(CM_REQ_PRIMARY_SL, req_msg, pri_path->sl);
 IBA_SET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg,
  (pri_path->hop_limit <= 1));
 IBA_SET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg,
  cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
          pri_path->packet_life_time));

 if (alt_path) {
  bool alt_ext = false;

  if (alt_path->rec_type == SA_PATH_REC_TYPE_OPA)
   alt_ext = opa_is_extended_lid(alt_path->opa.dlid,
            alt_path->opa.slid);

  *IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg) =
   alt_path->sgid;
  *IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg) =
   alt_path->dgid;
  if (alt_ext) {
   IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
     req_msg)
    ->global.interface_id =
    OPA_MAKE_ID(be32_to_cpu(alt_path->opa.slid));
   IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_REMOTE_PORT_GID,
     req_msg)
    ->global.interface_id =
    OPA_MAKE_ID(be32_to_cpu(alt_path->opa.dlid));
  }
  if (alt_path->hop_limit <= 1) {
   IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
    be16_to_cpu(
     alt_ext ? 0 :
        htons(ntohl(sa_path_get_slid(
         alt_path)))));
   IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
    be16_to_cpu(
     alt_ext ? 0 :
        htons(ntohl(sa_path_get_dlid(
         alt_path)))));
  } else {
   IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
    be16_to_cpu(IB_LID_PERMISSIVE));
   IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
    be16_to_cpu(IB_LID_PERMISSIVE));
  }
  IBA_SET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg,
   be32_to_cpu(alt_path->flow_label));
  IBA_SET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg, alt_path->rate);
  IBA_SET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg,
   alt_path->traffic_class);
  IBA_SET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg,
   alt_path->hop_limit);
  IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, alt_path->sl);
  IBA_SET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg,
   (alt_path->hop_limit <= 1));
  IBA_SET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg,
   cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
           alt_path->packet_life_time));
 }
 IBA_SET(CM_REQ_VENDOR_ID, req_msg, param->ece.vendor_id);

 if (param->private_data && param->private_data_len)
  IBA_SET_MEM(CM_REQ_PRIVATE_DATA, req_msg, param->private_data,
       param->private_data_len);
}

static int cm_validate_req_param(struct ib_cm_req_param *param)
{
 if (!param->primary_path)
  return -EINVAL;

 if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
     param->qp_type != IB_QPT_XRC_INI)
  return -EINVAL;

 if (param->private_data &&
     param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
  return -EINVAL;

 if (param->alternate_path &&
     (param->alternate_path->pkey != param->primary_path->pkey ||
      param->alternate_path->mtu != param->primary_path->mtu))
  return -EINVAL;

 return 0;
}

int ib_send_cm_req(struct ib_cm_id *cm_id,
     struct ib_cm_req_param *param)
{
 struct cm_av av = {}, alt_av = {};
 struct cm_id_private *cm_id_priv;
 struct ib_mad_send_buf *msg;
 struct cm_req_msg *req_msg;
 unsigned long flags;
 int ret;

 ret = cm_validate_req_param(param);
 if (ret)
  return ret;

 /* Verify that we're not in timewait. */
 cm_id_priv = container_of(cm_id, struct cm_id_private, id);
 spin_lock_irqsave(&cm_id_priv->lock, flags);
 if (cm_id->state != IB_CM_IDLE || WARN_ON(cm_id_priv->timewait_info)) {
  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
  return -EINVAL;
 }
 spin_unlock_irqrestore(&cm_id_priv->lock, flags);

 cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
           id.local_id);
 if (IS_ERR(cm_id_priv->timewait_info)) {
  ret = PTR_ERR(cm_id_priv->timewait_info);
  cm_id_priv->timewait_info = NULL;
  return ret;
 }

 ret = cm_init_av_by_path(param->primary_path,
     param->ppath_sgid_attr, &av);
 if (ret)
  return ret;
 if (param->alternate_path) {
  ret = cm_init_av_by_path(param->alternate_path, NULL,
      &alt_av);
  if (ret) {
   cm_destroy_av(&av);
   return ret;
  }
 }
 cm_id->service_id = param->service_id;
 cm_id_priv->timeout_ms = cm_convert_to_ms(
        param->primary_path->packet_life_time) * 2 +
     cm_convert_to_ms(
        param->remote_cm_response_timeout);
 cm_id_priv->max_cm_retries = param->max_cm_retries;
 cm_id_priv->initiator_depth = param->initiator_depth;
 cm_id_priv->responder_resources = param->responder_resources;
 cm_id_priv->retry_count = param->retry_count;
 cm_id_priv->path_mtu = param->primary_path->mtu;
 cm_id_priv->pkey = param->primary_path->pkey;
 cm_id_priv->qp_type = param->qp_type;

 spin_lock_irqsave(&cm_id_priv->lock, flags);

 cm_move_av_from_path(&cm_id_priv->av, &av);
 if (param->primary_path_outbound)
  cm_id_priv->av.dlid_datapath =
   be16_to_cpu(param->primary_path_outbound->ib.dlid);

 if (param->alternate_path)
  cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av);

 msg = cm_alloc_priv_msg(cm_id_priv, IB_CM_REQ_SENT);
 if (IS_ERR(msg)) {
  ret = PTR_ERR(msg);
  goto out_unlock;
 }

 req_msg = (struct cm_req_msg *)msg->mad;
 cm_format_req(req_msg, cm_id_priv, param);
 cm_id_priv->tid = req_msg->hdr.tid;

 cm_id_priv->local_qpn = cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
 cm_id_priv->rq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));

 trace_icm_send_req(&cm_id_priv->id);
 ret = ib_post_send_mad(msg, NULL);
 if (ret)
  goto out_free;
 BUG_ON(cm_id->state != IB_CM_IDLE);
 cm_id->state = IB_CM_REQ_SENT;
 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 return 0;
out_free:
 cm_free_priv_msg(msg);
out_unlock:
 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 return ret;
}
EXPORT_SYMBOL(ib_send_cm_req);

static int cm_issue_rej(struct cm_port *port,
   struct ib_mad_recv_wc *mad_recv_wc,
   enum ib_cm_rej_reason reason,
   enum cm_msg_response msg_rejected,
   void *ari, u8 ari_length)
{
 struct ib_mad_send_buf *msg = NULL;
 struct cm_rej_msg *rej_msg, *rcv_msg;
 int ret;

 ret = cm_alloc_response_msg(port, mad_recv_wc, false, &msg);
 if (ret)
  return ret;

 /* We just need common CM header information.  Cast to any message. */
 rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
 rej_msg = (struct cm_rej_msg *) msg->mad;

 cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
 IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
  IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg));
 IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
  IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
 IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, msg_rejected);
 IBA_SET(CM_REJ_REASON, rej_msg, reason);

 if (ari && ari_length) {
  IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
  IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
 }

 trace_icm_issue_rej(
  IBA_GET(CM_REJ_LOCAL_COMM_ID, rcv_msg),
  IBA_GET(CM_REJ_REMOTE_COMM_ID, rcv_msg));
 ret = ib_post_send_mad(msg, NULL);
 if (ret)
  cm_free_msg(msg);

 return ret;
}

static bool cm_req_has_alt_path(struct cm_req_msg *req_msg)
{
 return ((cpu_to_be16(
   IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg))) ||
  (ib_is_opa_gid(IBA_GET_MEM_PTR(CM_REQ_ALTERNATE_LOCAL_PORT_GID,
            req_msg))));
}

static void cm_path_set_rec_type(struct ib_device *ib_device, u32 port_num,
     struct sa_path_rec *path, union ib_gid *gid)
{
 if (ib_is_opa_gid(gid) && rdma_cap_opa_ah(ib_device, port_num))
  path->rec_type = SA_PATH_REC_TYPE_OPA;
 else
  path->rec_type = SA_PATH_REC_TYPE_IB;
}

static void cm_format_path_lid_from_req(struct cm_req_msg *req_msg,
     struct sa_path_rec *primary_path,
     struct sa_path_rec *alt_path,
     struct ib_wc *wc)
{
 u32 lid;

 if (primary_path->rec_type != SA_PATH_REC_TYPE_OPA) {
  sa_path_set_dlid(primary_path, wc->slid);
  sa_path_set_slid(primary_path,
     IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
      req_msg));
 } else {
  lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
   CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg));
  sa_path_set_dlid(primary_path, lid);

  lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
   CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg));
  sa_path_set_slid(primary_path, lid);
 }

 if (!cm_req_has_alt_path(req_msg))
  return;

 if (alt_path->rec_type != SA_PATH_REC_TYPE_OPA) {
  sa_path_set_dlid(alt_path,
     IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
      req_msg));
  sa_path_set_slid(alt_path,
     IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
      req_msg));
 } else {
  lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
   CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg));
  sa_path_set_dlid(alt_path, lid);

  lid = opa_get_lid_from_gid(IBA_GET_MEM_PTR(
   CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg));
  sa_path_set_slid(alt_path, lid);
 }
}

static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
         struct sa_path_rec *primary_path,
         struct sa_path_rec *alt_path,
         struct ib_wc *wc)
{
 primary_path->dgid =
  *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID, req_msg);
 primary_path->sgid =
  *IBA_GET_MEM_PTR(CM_REQ_PRIMARY_REMOTE_PORT_GID, req_msg);
 primary_path->flow_label =
  cpu_to_be32(IBA_GET(CM_REQ_PRIMARY_FLOW_LABEL, req_msg));
 primary_path->hop_limit = IBA_GET(CM_REQ_PRIMARY_HOP_LIMIT, req_msg);
 primary_path->traffic_class =
  IBA_GET(CM_REQ_PRIMARY_TRAFFIC_CLASS, req_msg);
 primary_path->reversible = 1;
 primary_path->pkey =
  cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
 primary_path->sl = IBA_GET(CM_REQ_PRIMARY_SL, req_msg);
 primary_path->mtu_selector = IB_SA_EQ;
 primary_path->mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
 primary_path->rate_selector = IB_SA_EQ;
 primary_path->rate = IBA_GET(CM_REQ_PRIMARY_PACKET_RATE, req_msg);
 primary_path->packet_life_time_selector = IB_SA_EQ;
 primary_path->packet_life_time =
  IBA_GET(CM_REQ_PRIMARY_LOCAL_ACK_TIMEOUT, req_msg);
 primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
 primary_path->service_id =
  cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
 if (sa_path_is_roce(primary_path))
  primary_path->roce.route_resolved = false;

 if (cm_req_has_alt_path(req_msg)) {
  alt_path->dgid = *IBA_GET_MEM_PTR(
   CM_REQ_ALTERNATE_LOCAL_PORT_GID, req_msg);
  alt_path->sgid = *IBA_GET_MEM_PTR(
   CM_REQ_ALTERNATE_REMOTE_PORT_GID, req_msg);
  alt_path->flow_label = cpu_to_be32(
   IBA_GET(CM_REQ_ALTERNATE_FLOW_LABEL, req_msg));
  alt_path->hop_limit =
   IBA_GET(CM_REQ_ALTERNATE_HOP_LIMIT, req_msg);
  alt_path->traffic_class =
   IBA_GET(CM_REQ_ALTERNATE_TRAFFIC_CLASS, req_msg);
  alt_path->reversible = 1;
  alt_path->pkey =
   cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
  alt_path->sl = IBA_GET(CM_REQ_ALTERNATE_SL, req_msg);
  alt_path->mtu_selector = IB_SA_EQ;
  alt_path->mtu =
   IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
  alt_path->rate_selector = IB_SA_EQ;
  alt_path->rate = IBA_GET(CM_REQ_ALTERNATE_PACKET_RATE, req_msg);
  alt_path->packet_life_time_selector = IB_SA_EQ;
  alt_path->packet_life_time =
   IBA_GET(CM_REQ_ALTERNATE_LOCAL_ACK_TIMEOUT, req_msg);
  alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
  alt_path->service_id =
   cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));

  if (sa_path_is_roce(alt_path))
   alt_path->roce.route_resolved = false;
 }
 cm_format_path_lid_from_req(req_msg, primary_path, alt_path, wc);
}

static u16 cm_get_bth_pkey(struct cm_work *work)
{
 struct ib_device *ib_dev = work->port->cm_dev->ib_device;
 u32 port_num = work->port->port_num;
 u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
 u16 pkey;
 int ret;

 ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
 if (ret) {
  dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %u, pkey index %u). %d\n",
         port_num, pkey_index, ret);
  return 0;
 }

 return pkey;
}

/**
 * cm_opa_to_ib_sgid - Convert OPA SGID to IB SGID
 * ULPs (such as IPoIB) do not understand OPA GIDs and will
 * reject them as the local_gid will not match the sgid. Therefore,
 * change the pathrec's SGID to an IB SGID.
 *
 * @work: Work completion
 * @path: Path record
 */

static void cm_opa_to_ib_sgid(struct cm_work *work,
         struct sa_path_rec *path)
{
 struct ib_device *dev = work->port->cm_dev->ib_device;
 u32 port_num = work->port->port_num;

 if (rdma_cap_opa_ah(dev, port_num) &&
     (ib_is_opa_gid(&path->sgid))) {
  union ib_gid sgid;

  if (rdma_query_gid(dev, port_num, 0, &sgid)) {
   dev_warn(&dev->dev,
     "Error updating sgid in CM request\n");
   return;
  }

  path->sgid = sgid;
 }
}

static void cm_format_req_event(struct cm_work *work,
    struct cm_id_private *cm_id_priv,
    struct ib_cm_id *listen_id)
{
 struct cm_req_msg *req_msg;
 struct ib_cm_req_event_param *param;

 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
 param = &work->cm_event.param.req_rcvd;
 param->listen_id = listen_id;
 param->bth_pkey = cm_get_bth_pkey(work);
 param->port = cm_id_priv->av.port->port_num;
 param->primary_path = &work->path[0];
 cm_opa_to_ib_sgid(work, param->primary_path);
 if (cm_req_has_alt_path(req_msg)) {
  param->alternate_path = &work->path[1];
  cm_opa_to_ib_sgid(work, param->alternate_path);
 } else {
  param->alternate_path = NULL;
 }
 param->remote_ca_guid =
  cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
 param->remote_qkey = IBA_GET(CM_REQ_LOCAL_Q_KEY, req_msg);
 param->remote_qpn = IBA_GET(CM_REQ_LOCAL_QPN, req_msg);
 param->qp_type = cm_req_get_qp_type(req_msg);
 param->starting_psn = IBA_GET(CM_REQ_STARTING_PSN, req_msg);
 param->responder_resources = IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
 param->initiator_depth = IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
 param->local_cm_response_timeout =
  IBA_GET(CM_REQ_REMOTE_CM_RESPONSE_TIMEOUT, req_msg);
 param->flow_control = IBA_GET(CM_REQ_END_TO_END_FLOW_CONTROL, req_msg);
 param->remote_cm_response_timeout =
  IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg);
 param->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
 param->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
 param->srq = IBA_GET(CM_REQ_SRQ, req_msg);
 param->ppath_sgid_attr = cm_id_priv->av.ah_attr.grh.sgid_attr;
 param->ece.vendor_id = IBA_GET(CM_REQ_VENDOR_ID, req_msg);
 param->ece.attr_mod = be32_to_cpu(req_msg->hdr.attr_mod);

 work->cm_event.private_data =
  IBA_GET_MEM_PTR(CM_REQ_PRIVATE_DATA, req_msg);
}

static void cm_process_work(struct cm_id_private *cm_id_priv,
       struct cm_work *work)
{
 int ret;

 /* We will typically only have the current event to report. */
 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
 cm_free_work(work);

 while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
  spin_lock_irq(&cm_id_priv->lock);
  work = cm_dequeue_work(cm_id_priv);
  spin_unlock_irq(&cm_id_priv->lock);
  if (!work)
   return;

  ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
      &work->cm_event);
  cm_free_work(work);
 }
 cm_deref_id(cm_id_priv);
 if (ret)
  cm_destroy_id(&cm_id_priv->id, ret);
}

static void cm_format_mra(struct cm_mra_msg *mra_msg,
     struct cm_id_private *cm_id_priv,
     enum cm_msg_response msg_mraed,
     const void *private_data, u8 private_data_len)
{
 cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
 IBA_SET(CM_MRA_MESSAGE_MRAED, mra_msg, msg_mraed);
 IBA_SET(CM_MRA_LOCAL_COMM_ID, mra_msg,
  be32_to_cpu(cm_id_priv->id.local_id));
 IBA_SET(CM_MRA_REMOTE_COMM_ID, mra_msg,
  be32_to_cpu(cm_id_priv->id.remote_id));
 IBA_SET(CM_MRA_SERVICE_TIMEOUT, mra_msg, CM_MRA_SETTING);

 if (private_data && private_data_len)
  IBA_SET_MEM(CM_MRA_PRIVATE_DATA, mra_msg, private_data,
       private_data_len);
}

static void cm_format_rej(struct cm_rej_msg *rej_msg,
     struct cm_id_private *cm_id_priv,
     enum ib_cm_rej_reason reason, void *ari,
     u8 ari_length, const void *private_data,
     u8 private_data_len, enum ib_cm_state state)
{
 lockdep_assert_held(&cm_id_priv->lock);

 cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
 IBA_SET(CM_REJ_REMOTE_COMM_ID, rej_msg,
  be32_to_cpu(cm_id_priv->id.remote_id));

 switch (state) {
 case IB_CM_REQ_RCVD:
  IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg, be32_to_cpu(0));
  IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
  break;
 case IB_CM_MRA_REQ_SENT:
  IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
   be32_to_cpu(cm_id_priv->id.local_id));
  IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REQ);
  break;
 case IB_CM_REP_RCVD:
 case IB_CM_MRA_REP_SENT:
  IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
   be32_to_cpu(cm_id_priv->id.local_id));
  IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg, CM_MSG_RESPONSE_REP);
  break;
 default:
  IBA_SET(CM_REJ_LOCAL_COMM_ID, rej_msg,
   be32_to_cpu(cm_id_priv->id.local_id));
  IBA_SET(CM_REJ_MESSAGE_REJECTED, rej_msg,
   CM_MSG_RESPONSE_OTHER);
  break;
 }

 IBA_SET(CM_REJ_REASON, rej_msg, reason);
 if (ari && ari_length) {
  IBA_SET(CM_REJ_REJECTED_INFO_LENGTH, rej_msg, ari_length);
  IBA_SET_MEM(CM_REJ_ARI, rej_msg, ari, ari_length);
 }

 if (private_data && private_data_len)
  IBA_SET_MEM(CM_REJ_PRIVATE_DATA, rej_msg, private_data,
       private_data_len);
}

static void cm_dup_req_handler(struct cm_work *work,
          struct cm_id_private *cm_id_priv)
{
 struct ib_mad_send_buf *msg = NULL;
 int ret;

 atomic_long_inc(
  &work->port->counters[CM_RECV_DUPLICATES][CM_REQ_COUNTER]);

 /* Quick state check to discard duplicate REQs. */
 spin_lock_irq(&cm_id_priv->lock);
 if (cm_id_priv->id.state == IB_CM_REQ_RCVD) {
  spin_unlock_irq(&cm_id_priv->lock);
  return;
 }
 spin_unlock_irq(&cm_id_priv->lock);

 ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, true, &msg);
 if (ret)
  return;

 spin_lock_irq(&cm_id_priv->lock);
 switch (cm_id_priv->id.state) {
 case IB_CM_MRA_REQ_SENT:
  cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
         CM_MSG_RESPONSE_REQ,
         cm_id_priv->private_data,
         cm_id_priv->private_data_len);
  break;
 case IB_CM_TIMEWAIT:
  cm_format_rej((struct cm_rej_msg *)msg->mad, cm_id_priv,
         IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0,
         IB_CM_TIMEWAIT);
  break;
 default:
  goto unlock;
 }
 spin_unlock_irq(&cm_id_priv->lock);

 trace_icm_send_dup_req(&cm_id_priv->id);
 ret = ib_post_send_mad(msg, NULL);
 if (ret)
  goto free;
 return;

unlock: spin_unlock_irq(&cm_id_priv->lock);
free: cm_free_msg(msg);
}

static struct cm_id_private *cm_match_req(struct cm_work *work,
       struct cm_id_private *cm_id_priv)
{
 struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
 struct cm_timewait_info *timewait_info;
 struct cm_req_msg *req_msg;

 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;

 /* Check for possible duplicate REQ. */
 spin_lock_irq(&cm.lock);
 timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
 if (timewait_info) {
  cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
        timewait_info->work.remote_id);
  spin_unlock_irq(&cm.lock);
  if (cur_cm_id_priv) {
   cm_dup_req_handler(work, cur_cm_id_priv);
   cm_deref_id(cur_cm_id_priv);
  }
  return NULL;
 }

 /* Check for stale connections. */
 timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
 if (timewait_info) {
  cm_remove_remote(cm_id_priv);
  cur_cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
        timewait_info->work.remote_id);

  spin_unlock_irq(&cm.lock);
  cm_issue_rej(work->port, work->mad_recv_wc,
        IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
        NULL, 0);
  if (cur_cm_id_priv) {
   ib_send_cm_dreq(&cur_cm_id_priv->id, NULL, 0);
   cm_deref_id(cur_cm_id_priv);
  }
  return NULL;
 }

 /* Find matching listen request. */
 listen_cm_id_priv = cm_find_listen(
  cm_id_priv->id.device,
  cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)));
 if (!listen_cm_id_priv) {
  cm_remove_remote(cm_id_priv);
  spin_unlock_irq(&cm.lock);
  cm_issue_rej(work->port, work->mad_recv_wc,
        IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
        NULL, 0);
  return NULL;
 }
 spin_unlock_irq(&cm.lock);
 return listen_cm_id_priv;
}

/*
 * Work-around for inter-subnet connections.  If the LIDs are permissive,
 * we need to override the LID/SL data in the REQ with the LID information
 * in the work completion.
 */

static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
{
 if (!IBA_GET(CM_REQ_PRIMARY_SUBNET_LOCAL, req_msg)) {
  if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID,
     req_msg)) == IB_LID_PERMISSIVE) {
   IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg,
    be16_to_cpu(ib_lid_be16(wc->slid)));
   IBA_SET(CM_REQ_PRIMARY_SL, req_msg, wc->sl);
  }

  if (cpu_to_be16(IBA_GET(CM_REQ_PRIMARY_REMOTE_PORT_LID,
     req_msg)) == IB_LID_PERMISSIVE)
   IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg,
    wc->dlid_path_bits);
 }

 if (!IBA_GET(CM_REQ_ALTERNATE_SUBNET_LOCAL, req_msg)) {
  if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_LOCAL_PORT_LID,
     req_msg)) == IB_LID_PERMISSIVE) {
   IBA_SET(CM_REQ_ALTERNATE_LOCAL_PORT_LID, req_msg,
    be16_to_cpu(ib_lid_be16(wc->slid)));
   IBA_SET(CM_REQ_ALTERNATE_SL, req_msg, wc->sl);
  }

  if (cpu_to_be16(IBA_GET(CM_REQ_ALTERNATE_REMOTE_PORT_LID,
     req_msg)) == IB_LID_PERMISSIVE)
   IBA_SET(CM_REQ_ALTERNATE_REMOTE_PORT_LID, req_msg,
    wc->dlid_path_bits);
 }
}

static int cm_req_handler(struct cm_work *work)
{
 struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
 struct cm_req_msg *req_msg;
 const struct ib_global_route *grh;
 const struct ib_gid_attr *gid_attr;
 int ret;

 req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;

 cm_id_priv =
  cm_alloc_id_priv(work->port->cm_dev->ib_device, NULL, NULL);
 if (IS_ERR(cm_id_priv))
  return PTR_ERR(cm_id_priv);

 cm_id_priv->id.remote_id =
  cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg));
 cm_id_priv->id.service_id =
  cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg));
 cm_id_priv->tid = req_msg->hdr.tid;
 cm_id_priv->timeout_ms = cm_convert_to_ms(
  IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg));
 cm_id_priv->max_cm_retries = IBA_GET(CM_REQ_MAX_CM_RETRIES, req_msg);
 cm_id_priv->remote_qpn =
  cpu_to_be32(IBA_GET(CM_REQ_LOCAL_QPN, req_msg));
 cm_id_priv->initiator_depth =
  IBA_GET(CM_REQ_RESPONDER_RESOURCES, req_msg);
 cm_id_priv->responder_resources =
  IBA_GET(CM_REQ_INITIATOR_DEPTH, req_msg);
 cm_id_priv->path_mtu = IBA_GET(CM_REQ_PATH_PACKET_PAYLOAD_MTU, req_msg);
 cm_id_priv->pkey = cpu_to_be16(IBA_GET(CM_REQ_PARTITION_KEY, req_msg));
 cm_id_priv->sq_psn = cpu_to_be32(IBA_GET(CM_REQ_STARTING_PSN, req_msg));
 cm_id_priv->retry_count = IBA_GET(CM_REQ_RETRY_COUNT, req_msg);
 cm_id_priv->rnr_retry_count = IBA_GET(CM_REQ_RNR_RETRY_COUNT, req_msg);
 cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);

 ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
          work->mad_recv_wc->recv_buf.grh,
          &cm_id_priv->av);
 if (ret)
  goto destroy;
 cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
           id.local_id);
 if (IS_ERR(cm_id_priv->timewait_info)) {
  ret = PTR_ERR(cm_id_priv->timewait_info);
  cm_id_priv->timewait_info = NULL;
  goto destroy;
 }
 cm_id_priv->timewait_info->work.remote_id = cm_id_priv->id.remote_id;
 cm_id_priv->timewait_info->remote_ca_guid =
  cpu_to_be64(IBA_GET(CM_REQ_LOCAL_CA_GUID, req_msg));
 cm_id_priv->timewait_info->remote_qpn = cm_id_priv->remote_qpn;

 /*
 * Note that the ID pointer is not in the xarray at this point,
 * so this set is only visible to the local thread.
 */

 cm_id_priv->id.state = IB_CM_REQ_RCVD;

 listen_cm_id_priv = cm_match_req(work, cm_id_priv);
 if (!listen_cm_id_priv) {
  trace_icm_no_listener_err(&cm_id_priv->id);
  cm_id_priv->id.state = IB_CM_IDLE;
  ret = -EINVAL;
  goto destroy;
 }

 memset(&work->path[0], 0, sizeof(work->path[0]));
 if (cm_req_has_alt_path(req_msg))
  memset(&work->path[1], 0, sizeof(work->path[1]));
 grh = rdma_ah_read_grh(&cm_id_priv->av.ah_attr);
 gid_attr = grh->sgid_attr;

 if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE) {
  work->path[0].rec_type =
   sa_conv_gid_to_pathrec_type(gid_attr->gid_type);
 } else {
  cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
  cm_path_set_rec_type(
   work->port->cm_dev->ib_device, work->port->port_num,
   &work->path[0],
   IBA_GET_MEM_PTR(CM_REQ_PRIMARY_LOCAL_PORT_GID,
     req_msg));
 }
 if (cm_req_has_alt_path(req_msg))
  work->path[1].rec_type = work->path[0].rec_type;
 cm_format_paths_from_req(req_msg, &work->path[0],
     &work->path[1], work->mad_recv_wc->wc);
 if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_ROCE)
  sa_path_set_dmac(&work->path[0],
     cm_id_priv->av.ah_attr.roce.dmac);
 work->path[0].hop_limit = grh->hop_limit;

 /* This destroy call is needed to pair with cm_init_av_for_response */
 cm_destroy_av(&cm_id_priv->av);
 ret = cm_init_av_by_path(&work->path[0], gid_attr, &cm_id_priv->av);
 if (ret) {
  int err;

  err = rdma_query_gid(work->port->cm_dev->ib_device,
         work->port->port_num, 0,
         &work->path[0].sgid);
  if (err)
   ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
           NULL, 0, NULL, 0);
  else
   ib_send_cm_rej(&cm_id_priv->id, IB_CM_REJ_INVALID_GID,
           &work->path[0].sgid,
           sizeof(work->path[0].sgid),
--> --------------------

--> maximum size reached

--> --------------------

Messung V0.5
C=98 H=90 G=94

¤ Dauer der Verarbeitung: 0.16 Sekunden  ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.