/* * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE.
*/ #include <linux/module.h> #include <linux/list.h> #include <linux/workqueue.h> #include <linux/skbuff.h> #include <linux/timer.h> #include <linux/notifier.h> #include <linux/inetdevice.h> #include <linux/ip.h> #include <linux/tcp.h> #include <linux/if_vlan.h>
/* * Atomically lookup the ep ptr given the tid and grab a reference on the ep.
*/ staticstruct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsignedint tid)
{ struct c4iw_ep *ep; unsignedlong flags;
xa_lock_irqsave(&dev->hwtids, flags);
ep = xa_load(&dev->hwtids, tid); if (ep)
c4iw_get_ep(&ep->com);
xa_unlock_irqrestore(&dev->hwtids, flags); return ep;
}
/* * Atomically lookup the ep ptr given the stid and grab a reference on the ep.
*/ staticstruct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev, unsignedint stid)
{ struct c4iw_listen_ep *ep; unsignedlong flags;
xa_lock_irqsave(&dev->stids, flags);
ep = xa_load(&dev->stids, stid); if (ep)
c4iw_get_ep(&ep->com);
xa_unlock_irqrestore(&dev->stids, flags); return ep;
}
/* * If we have a hwtid, then remove it from the idr table * so lookups will no longer find this endpoint. Otherwise * we have a race where one thread finds the ep ptr just * before the other thread is freeing the ep memory.
*/ if (ep->hwtid != -1)
remove_ep_tid(ep);
c4iw_put_ep(&ep->com);
}
staticint status2errno(int status)
{ switch (status) { case CPL_ERR_NONE: return 0; case CPL_ERR_CONN_RESET: return -ECONNRESET; case CPL_ERR_ARP_MISS: return -EHOSTUNREACH; case CPL_ERR_CONN_TIMEDOUT: return -ETIMEDOUT; case CPL_ERR_TCAM_FULL: return -ENOMEM; case CPL_ERR_CONN_EXIST: return -EADDRINUSE; default: return -EIO;
}
}
/* * Fake up a special CPL opcode and call sched() so process_work() will call * _put_ep_safe() in a safe context to free the ep resources. This is needed * because ARP error handlers are called in an ATOMIC context, and * _c4iw_free_ep() needs to block.
*/ staticvoid queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb, int cpl)
{ struct cpl_act_establish *rpl = cplhdr(skb);
/* Set our special ARP_FAILURE opcode */
rpl->ot.opcode = cpl;
/* * Save ep in the skb->cb area, after where sched() will save the dev * ptr.
*/
*((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep;
sched(ep->com.dev, skb);
}
/* Handle an ARP failure for an accept */ staticvoid pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
{ struct c4iw_ep *ep = handle;
pr_err("ARP failure during accept - tid %u - dropping connection\n",
ep->hwtid);
/* * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant * and send it along.
*/ staticvoid abort_arp_failure(void *handle, struct sk_buff *skb)
{ int ret; struct c4iw_ep *ep = handle; struct c4iw_rdev *rdev = &ep->com.dev->rdev; struct cpl_abort_req *req = cplhdr(skb);
/* * keep a ref on the ep so the tcb is not unlocked before this * cpl completes. The ref is released in read_tcb_rpl().
*/
c4iw_get_ep(&ep->com); if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
c4iw_put_ep(&ep->com);
}
/* * Specify the largest window that will fit in opt0. The * remainder will be specified in the rx_data_ack.
*/
win = ep->rcv_win >> 10; if (win > RCV_BUFSIZ_M)
win = RCV_BUFSIZ_M;
if (ep->plen)
memcpy(mpa->private_data + sizeof(struct mpa_v2_conn_params),
ep->mpa_pkt + sizeof(*mpa), ep->plen);
} else if (ep->plen)
memcpy(mpa->private_data,
ep->mpa_pkt + sizeof(*mpa), ep->plen);
/* * Reference the mpa skb. This ensures the data area * will remain in memory until the hw acks the tx. * Function fw4_ack() will deref it.
*/
skb_get(skb);
t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
ep->mpa_skb = skb;
ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); if (ret) return ret;
start_ep_timer(ep);
__state_set(&ep->com, MPA_REQ_SENT);
ep->mpa_attr.initiator = 1;
ep->snd_seq += mpalen; return ret;
}
if (ep->plen)
memcpy(mpa->private_data + sizeof(struct mpa_v2_conn_params), pdata, plen);
} else if (plen)
memcpy(mpa->private_data, pdata, plen);
/* * Reference the mpa skb again. This ensures the data area * will remain in memory until the hw acks the tx. * Function fw4_ack() will deref it.
*/
skb_get(skb);
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
ep->mpa_skb = skb;
ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
if (ep->plen)
memcpy(mpa->private_data + sizeof(struct mpa_v2_conn_params), pdata, plen);
} else if (plen)
memcpy(mpa->private_data, pdata, plen);
/* * Reference the mpa skb. This ensures the data area * will remain in memory until the hw acks the tx. * Function fw4_ack() will deref it.
*/
skb_get(skb);
t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
ep->mpa_skb = skb;
__state_set(&ep->com, MPA_REP_SENT);
ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
/* * If we couldn't specify the entire rcv window at connection setup * due to the limit in the number of bits in the RCV_BUFSIZ field, * then add the overage in to the credits returned.
*/ if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
/* * process_mpa_reply - process streaming mode MPA reply * * Returns: * * 0 upon success indicating a connect request was delivered to the ULP * or the mpa request is incomplete but valid so far. * * 1 if a failure requires the caller to close the connection. * * 2 if a failure requires the caller to abort the connection.
*/ staticint process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
{ struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params;
u16 plen;
u16 resp_ird, resp_ord;
u8 rtr_mismatch = 0, insuff_ird = 0; struct c4iw_qp_attributes attrs; enum c4iw_qp_attr_mask mask; int err; int disconnect = 0;
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
/* * If we get more than the supported amount of private data * then we must fail this connection.
*/ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
err = -EINVAL; goto err_stop_timer;
}
/* * copy the new data into our accumulation buffer.
*/
skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
skb->len);
ep->mpa_pkt_len += skb->len;
/* * if we don't even have the mpa message, then bail.
*/ if (ep->mpa_pkt_len < sizeof(*mpa)) return 0;
mpa = (struct mpa_message *) ep->mpa_pkt;
/* Validate MPA header. */ if (mpa->revision > mpa_rev) {
pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
__func__, mpa_rev, mpa->revision);
err = -EPROTO; goto err_stop_timer;
} if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
err = -EPROTO; goto err_stop_timer;
}
plen = ntohs(mpa->private_data_size);
/* * Fail if there's too much private data.
*/ if (plen > MPA_MAX_PRIVATE_DATA) {
err = -EPROTO; goto err_stop_timer;
}
/* * If plen does not account for pkt size
*/ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
err = -EPROTO; goto err_stop_timer;
}
ep->plen = (u8) plen;
/* * If we don't have all the pdata yet, then bail. * We'll continue process when more data arrives.
*/ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) return 0;
/* * Stop mpa timer. If it expired, then * we ignore the MPA reply. process_timeout() * will abort the connection.
*/ if (stop_ep_timer(ep)) return 0;
/* * If we get here we have accumulated the entire mpa * start reply message including private data. And * the MPA header is valid.
*/
__state_set(&ep->com, FPDU_MODE);
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa->revision;
ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
if (mpa->revision == 2) {
ep->mpa_attr.enhanced_rdma_conn =
mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; if (ep->mpa_attr.enhanced_rdma_conn) {
mpa_v2_params = (struct mpa_v2_conn_params *)
(ep->mpa_pkt + sizeof(*mpa));
resp_ird = ntohs(mpa_v2_params->ird) &
MPA_V2_IRD_ORD_MASK;
resp_ord = ntohs(mpa_v2_params->ord) &
MPA_V2_IRD_ORD_MASK;
pr_debug("responder ird %u ord %u ep ird %u ord %u\n",
resp_ird, resp_ord, ep->ird, ep->ord);
/* * This is a double-check. Ideally, below checks are * not required since ird/ord stuff has been taken * care of in c4iw_accept_cr
*/ if (ep->ird < resp_ord) { if (RELAXED_IRD_NEGOTIATION && resp_ord <=
ep->com.dev->rdev.lldi.max_ordird_qp)
ep->ird = resp_ord; else
insuff_ird = 1;
} elseif (ep->ird > resp_ord) {
ep->ird = resp_ord;
} if (ep->ord > resp_ird) { if (RELAXED_IRD_NEGOTIATION)
ep->ord = resp_ird; else
insuff_ird = 1;
} if (insuff_ird) {
err = -ENOMEM;
ep->ird = resp_ord;
ep->ord = resp_ird;
}
/* * If responder's RTR does not match with that of initiator, assign * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not * generated when moving QP to RTS state. * A TERM message will be sent after QP has moved to RTS state
*/ if ((ep->mpa_attr.version == 2) && peer2peer &&
(ep->mpa_attr.p2p_type != p2p_type)) {
ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
rtr_mismatch = 1;
}
/* bind QP and TID with INIT_WR */
err = c4iw_modify_qp(ep->com.qp->rhp,
ep->com.qp, mask, &attrs, 1); if (err) goto err;
/* * If responder's RTR requirement did not match with what initiator * supports, generate TERM message
*/ if (rtr_mismatch) {
pr_err("%s: RTR mismatch, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_NOMATCH_RTR;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
attrs.send_term = 1;
err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
err = -ENOMEM;
disconnect = 1; goto out;
}
/* * Generate TERM if initiator IRD is not sufficient for responder * provided ORD. Currently, we do the same behaviour even when * responder provided IRD is also not sufficient as regards to * initiator ORD.
*/ if (insuff_ird) {
pr_err("%s: Insufficient IRD, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_INSUFF_IRD;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
attrs.send_term = 1;
err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
err = -ENOMEM;
disconnect = 1; goto out;
} goto out;
err_stop_timer:
stop_ep_timer(ep);
err:
disconnect = 2;
out:
connect_reply_upcall(ep, err); return disconnect;
}
/* * process_mpa_request - process streaming mode MPA request * * Returns: * * 0 upon success indicating a connect request was delivered to the ULP * or the mpa request is incomplete but valid so far. * * 1 if a failure requires the caller to close the connection. * * 2 if a failure requires the caller to abort the connection.
*/ staticint process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
{ struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params;
u16 plen;
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
/* * If we get more than the supported amount of private data * then we must fail this connection.
*/ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) goto err_stop_timer;
pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
/* * Copy the new data into our accumulation buffer.
*/
skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
skb->len);
ep->mpa_pkt_len += skb->len;
/* * If we don't even have the mpa message, then bail. * We'll continue process when more data arrives.
*/ if (ep->mpa_pkt_len < sizeof(*mpa)) return 0;
/* * If this TCB had a srq buffer cached, then we must complete * it. For user mode, that means saving the srqidx in the * user/kernel status page for this qp. For kernel mode, just * synthesize the CQE now.
*/ if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) { if (ep->com.qp->ibqp.uobject)
t4_set_wq_in_error(&ep->com.qp->wq, srqidx); else
c4iw_flush_srqidx(ep->com.qp, srqidx);
}
}
/* * Specify the largest window that will fit in opt0. The * remainder will be specified in the rx_data_ack.
*/
win = ep->rcv_win >> 10; if (win > RCV_BUFSIZ_M)
win = RCV_BUFSIZ_M;
/* * Some of the error codes above implicitly indicate that there is no TID * allocated with the result of an ACT_OPEN. We use this predicate to make * that explicit.
*/ staticinlineint act_open_has_tid(int status)
{ return (status != CPL_ERR_TCAM_PARITY &&
status != CPL_ERR_TCAM_MISS &&
status != CPL_ERR_TCAM_FULL &&
status != CPL_ERR_CONN_EXIST_SYNRECV &&
status != CPL_ERR_CONN_EXIST);
}
staticchar *neg_adv_str(unsignedint status)
{ switch (status) { case CPL_ERR_RTX_NEG_ADVICE: return"Retransmit timeout"; case CPL_ERR_PERSIST_NEG_ADVICE: return"Persist timeout"; case CPL_ERR_KEEPALV_NEG_ADVICE: return"Keepalive timeout"; default: return"Unknown";
}
}
/* When MPA revision is different on nodes, the node with MPA_rev=2 * tries to reconnect with MPA_rev 1 for the same EP through * c4iw_reconnect(), where the same EP is assigned with new tid for * further connection establishment. As we are using the same EP pointer * for reconnect, few skbs are used during the previous c4iw_connect(), * which leaves the EP with inadequate skbs for further * c4iw_reconnect(), Further causing a crash due to an empty * skb_list() during peer_abort(). Allocate skbs which is already used.
*/
size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list)); if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) {
err = -ENOMEM; goto fail1;
}
/* * Allocate an active TID to initiate a TCP connection.
*/
ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); if (ep->atid == -1) {
pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM; goto fail2;
}
err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL); if (err) goto fail2a;
/* send connect request to rnic */
err = send_connect(ep); if (!err) goto out;
cxgb4_l2t_release(ep->l2t);
fail4:
dst_release(ep->dst);
fail3:
xa_erase_irq(&ep->com.dev->atids, ep->atid);
fail2a:
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
fail2: /* * remember to send notification to upper layer. * We are in here so the upper layer is not aware that this is * re-connect attempt and so, upper layer is still waiting for * response of 1st connect request.
*/
connect_reply_upcall(ep, -ECONNRESET);
fail1:
c4iw_put_ep(&ep->com);
out: return err;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.