/* * Copyright (c) 2006 Mellanox Technologies. All rights reserved * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE.
*/
module_param_named(max_nonsrq_conn_qp, ipoib_max_conn_qp, int, 0444);
MODULE_PARM_DESC(max_nonsrq_conn_qp, "Max number of connected-mode QPs per interface " "(applied only if shared receive queue is not available)");
for (i = 0; i < ipoib_recvq_size; ++i) if (rx_ring[i].skb) {
ipoib_cm_dma_unmap_rx(priv, IPOIB_CM_RX_SG - 1,
rx_ring[i].mapping);
dev_kfree_skb_any(rx_ring[i].skb);
}
/* We only reserved 1 extra slot in CQ for drain WRs, so
* make sure we have at most 1 outstanding WR. */ if (list_empty(&priv->cm.rx_flush_list) ||
!list_empty(&priv->cm.rx_drain_list)) return;
/* * QPs on flush list are error state. This way, a "flush * error" WC will be immediately generated for each WR we post.
*/
p = list_entry(priv->cm.rx_flush_list.next, typeof(*p), list);
ipoib_cm_rx_drain_wr.wr_id = IPOIB_CM_RX_DRAIN_WRID; if (ib_post_send(p->qp, &ipoib_cm_rx_drain_wr, NULL))
ipoib_warn(priv, "failed to post drain wr\n");
qp_attr.qp_state = IB_QPS_INIT;
ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); if (ret) {
ipoib_warn(priv, "failed to init QP attr for INIT: %d\n", ret); return ret;
}
ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) {
ipoib_warn(priv, "failed to modify QP to INIT: %d\n", ret); return ret;
}
qp_attr.qp_state = IB_QPS_RTR;
ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); if (ret) {
ipoib_warn(priv, "failed to init QP attr for RTR: %d\n", ret); return ret;
}
qp_attr.rq_psn = psn;
ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) {
ipoib_warn(priv, "failed to modify QP to RTR: %d\n", ret); return ret;
}
/* * Current Mellanox HCA firmware won't generate completions * with error for drain WRs unless the QP has been moved to * RTS first. This work-around leaves a window where a QP has * moved to error asynchronously, but this will eventually get * fixed in firmware, so let's not error out if modify QP * fails.
*/
qp_attr.qp_state = IB_QPS_RTS;
ret = ib_cm_init_qp_attr(cm_id, &qp_attr, &qp_attr_mask); if (ret) {
ipoib_warn(priv, "failed to init QP attr for RTS: %d\n", ret); return 0;
}
ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask); if (ret) {
ipoib_warn(priv, "failed to modify QP to RTS: %d\n", ret); return 0;
}
p->qp = ipoib_cm_create_rx_qp(dev, p); if (IS_ERR(p->qp)) {
ret = PTR_ERR(p->qp); goto err_qp;
}
psn = get_random_u32() & 0xffffff;
ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn); if (ret) goto err_modify;
if (!ipoib_cm_has_srq(dev)) {
ret = ipoib_cm_nonsrq_init_rx(dev, cm_id, p); if (ret) goto err_modify;
}
spin_lock_irq(&priv->lock);
queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY); /* Add this entry to passive ids list head, but do not re-add it
* if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
p->jiffies = jiffies; if (p->state == IPOIB_CM_RX_LIVE)
list_move(&p->list, &priv->cm.passive_ids);
spin_unlock_irq(&priv->lock);
ret = ipoib_cm_send_rep(dev, cm_id, p->qp, &event->param.req_rcvd, psn); if (ret) {
ipoib_warn(priv, "failed to send REP: %d\n", ret); if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
ipoib_warn(priv, "unable to move qp to error state\n");
} return 0;
switch (event->event) { case IB_CM_REQ_RECEIVED: return ipoib_cm_req_handler(cm_id, event); case IB_CM_DREQ_RECEIVED:
ib_send_cm_drep(cm_id, NULL, 0);
fallthrough; case IB_CM_REJ_RECEIVED:
p = cm_id->context;
priv = ipoib_priv(p->dev); if (ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE))
ipoib_warn(priv, "unable to move qp to error state\n");
fallthrough; default: return 0;
}
} /* Adjust length of skb with fragments to match received data */ staticvoid skb_put_frags(struct sk_buff *skb, unsignedint hdr_space, unsignedint length, struct sk_buff *toskb)
{ int i, num_frags; unsignedint size;
/* put header into skb */
size = min(length, hdr_space);
skb->tail += size;
skb->len += size;
length -= size;
num_frags = skb_shinfo(skb)->nr_frags; for (i = 0; i < num_frags; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
if (length == 0) { /* don't need this page */
skb_fill_page_desc(toskb, i, skb_frag_page(frag),
0, PAGE_SIZE);
--skb_shinfo(skb)->nr_frags;
} else {
size = min_t(unsignedint, length, PAGE_SIZE);
if (unlikely(!(wr_id & IPOIB_CM_RX_UPDATE_MASK))) { if (p && time_after_eq(jiffies, p->jiffies + IPOIB_CM_RX_UPDATE_TIME)) {
spin_lock_irqsave(&priv->lock, flags);
p->jiffies = jiffies; /* Move this entry to list head, but do not re-add it
* if it has been moved out of list. */ if (p->state == IPOIB_CM_RX_LIVE)
list_move(&p->list, &priv->cm.passive_ids);
spin_unlock_irqrestore(&priv->lock, flags);
}
}
if (wc->byte_len < IPOIB_CM_COPYBREAK) { int dlen = wc->byte_len;
newskb = ipoib_cm_alloc_rx_skb(dev, rx_ring, wr_id, frags,
mapping, GFP_ATOMIC); if (unlikely(!newskb)) { /* * If we can't allocate a new RX buffer, dump * this packet and reuse the old buffer.
*/
ipoib_dbg(priv, "failed to allocate receive buffer %d\n", wr_id);
++dev->stats.rx_dropped; goto repost;
}
if (unlikely(skb->len > tx->mtu)) {
ipoib_warn(priv, "packet len %d (> %d) too long to send, dropping\n",
skb->len, tx->mtu);
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
ipoib_cm_skb_too_long(dev, skb, tx->mtu - IPOIB_ENCAP_LEN); return;
} if (skb_shinfo(skb)->nr_frags > usable_sge) { if (skb_linearize(skb) < 0) {
ipoib_warn(priv, "skb could not be linearized\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb); return;
} /* Does skb_linearize return ok without reducing nr_frags? */ if (skb_shinfo(skb)->nr_frags > usable_sge) {
ipoib_warn(priv, "too many frags after skb linearize\n");
++dev->stats.tx_dropped;
++dev->stats.tx_errors;
dev_kfree_skb_any(skb); return;
}
}
ipoib_dbg_data(priv, "sending packet: head 0x%x length %d connection 0x%x\n",
tx->tx_head, skb->len, tx->qp->qp_num);
/* * We put the skb into the tx_ring _before_ we call post_send() * because it's entirely possible that the completion handler will * run before we execute anything after the post_send(). That * means we have to make sure everything is properly recorded and * our state is consistent before we call post_send().
*/
tx_req = &tx->tx_ring[tx->tx_head & (ipoib_sendq_size - 1)];
tx_req->skb = skb;
if (unlikely(ipoib_dma_map_tx(priv->ca, tx_req))) {
++dev->stats.tx_errors;
dev_kfree_skb_any(skb); return;
}
if ((priv->global_tx_head - priv->global_tx_tail) ==
ipoib_sendq_size - 1) {
ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
tx->qp->qp_num);
netif_stop_queue(dev);
}
skb_orphan(skb);
skb_dst_drop(skb);
if (netif_queue_stopped(dev)) {
rc = ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP |
IB_CQ_REPORT_MISSED_EVENTS); if (unlikely(rc < 0))
ipoib_warn(priv, "IPoIB/CM:request notify on send CQ failed\n"); elseif (rc)
napi_schedule(&priv->send_napi);
}
if (neigh) {
neigh->cm = NULL;
ipoib_neigh_free(neigh);
tx->neigh = NULL;
}
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list);
queue_work(priv->wq, &priv->cm.reap_task);
}
clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
spin_unlock_irqrestore(&priv->lock, flags);
}
netif_tx_unlock(dev);
}
int ipoib_cm_dev_open(struct net_device *dev)
{ struct ipoib_dev_priv *priv = ipoib_priv(dev); int ret;
if (!IPOIB_CM_SUPPORTED(dev->dev_addr)) return 0;
priv->cm.id = ib_create_cm_id(priv->ca, ipoib_cm_rx_handler, dev); if (IS_ERR(priv->cm.id)) {
pr_warn("%s: failed to create CM ID\n", priv->ca->name);
ret = PTR_ERR(priv->cm.id); goto err_cm;
}
ret = ib_cm_listen(priv->cm.id,
cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num)); if (ret) {
pr_warn("%s: failed to listen on ID 0x%llx\n", priv->ca->name,
IPOIB_CM_IETF_ID | priv->qp->qp_num); goto err_listen;
}
ipoib_dbg(priv, "Destroy active connection 0x%x head 0x%x tail 0x%x\n",
p->qp ? p->qp->qp_num : 0, p->tx_head, p->tx_tail);
if (p->id)
ib_destroy_cm_id(p->id);
if (p->tx_ring) { /* Wait for all sends to complete */
begin = jiffies; while ((int) p->tx_tail - (int) p->tx_head < 0) { if (time_after(jiffies, begin + 5 * HZ)) {
ipoib_warn(priv, "timing out; %d sends not completed\n",
p->tx_head - p->tx_tail); goto timeout;
}
while (!list_empty(&priv->cm.start_list)) {
p = list_entry(priv->cm.start_list.next, typeof(*p), list);
list_del_init(&p->list);
neigh = p->neigh;
qpn = IPOIB_QPN(neigh->daddr); /* * As long as the search is with these 2 locks, * path existence indicates its validity.
*/
path = __path_find(dev, neigh->daddr + QPN_AND_OPTIONS_OFFSET); if (!path) {
pr_info("%s ignore not valid path %pI6\n",
__func__,
neigh->daddr + QPN_AND_OPTIONS_OFFSET); goto free_neigh;
}
memcpy(&pathrec, &path->pathrec, sizeof(pathrec));
spin_lock_irq(&priv->lock); while (!list_empty(&priv->cm.passive_ids)) { /* List is sorted by LRU, start from tail,
* stop when we see a recently used entry */
p = list_entry(priv->cm.passive_ids.prev, typeof(*p), list); if (time_before_eq(jiffies, p->jiffies + IPOIB_CM_RX_TIMEOUT)) break;
list_move(&p->list, &priv->cm.rx_error_list);
p->state = IPOIB_CM_RX_ERROR;
spin_unlock_irq(&priv->lock);
ret = ib_modify_qp(p->qp, &ipoib_cm_err_attr, IB_QP_STATE); if (ret)
ipoib_warn(priv, "unable to move qp to error state: %d\n", ret);
spin_lock_irq(&priv->lock);
}
if (!list_empty(&priv->cm.passive_ids))
queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY);
spin_unlock_irq(&priv->lock);
}
if (!rtnl_trylock()) { return restart_syscall();
}
if (dev->reg_state != NETREG_REGISTERED) {
rtnl_unlock(); return -EPERM;
}
ret = ipoib_set_mode(dev, buf);
/* The assumption is that the function ipoib_set_mode returned * with the rtnl held by it, if not the value -EBUSY returned, * then no need to rtnl_unlock
*/ if (ret != -EBUSY)
rtnl_unlock();
return (!ret || ret == -EBUSY) ? count : ret;
}
static DEVICE_ATTR_RW(mode);
int ipoib_cm_add_mode_attr(struct net_device *dev)
{ return device_create_file(&dev->dev, &dev_attr_mode);
}
if (ipoib_cm_has_srq(dev)) { for (i = 0; i < ipoib_recvq_size; ++i) { if (!ipoib_cm_alloc_rx_skb(dev, priv->cm.srq_ring, i,
priv->cm.num_frags - 1,
priv->cm.srq_ring[i].mapping,
GFP_KERNEL)) {
ipoib_warn(priv, "failed to allocate " "receive buffer %d\n", i);
ipoib_cm_dev_cleanup(dev); return -ENOMEM;
}
if (ipoib_cm_post_receive_srq(dev, i)) {
ipoib_warn(priv, "ipoib_cm_post_receive_srq " "failed for buf %d\n", i);
ipoib_cm_dev_cleanup(dev); return -EIO;
}
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.