/* * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE.
*/ #include <linux/module.h> #include <linux/list.h> #include <linux/workqueue.h> #include <linux/skbuff.h> #include <linux/timer.h> #include <linux/notifier.h> #include <linux/inetdevice.h> #include <linux/ip.h> #include <linux/tcp.h> #include <linux/if_vlan.h>
/* * Atomically lookup the ep ptr given the tid and grab a reference on the ep.
*/ staticstruct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsignedint tid)
{ struct c4iw_ep *ep; unsignedlong flags;
xa_lock_irqsave(&dev->hwtids, flags);
ep = xa_load(&dev->hwtids, tid); if (ep)
c4iw_get_ep(&ep->com);
xa_unlock_irqrestore(&dev->hwtids, flags); return ep;
}
/* * Atomically lookup the ep ptr given the stid and grab a reference on the ep.
*/ staticstruct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev, unsignedint stid)
{ struct c4iw_listen_ep *ep; unsignedlong flags;
xa_lock_irqsave(&dev->stids, flags);
ep = xa_load(&dev->stids, stid); if (ep)
c4iw_get_ep(&ep->com);
xa_unlock_irqrestore(&dev->stids, flags); return ep;
}
/* * If we have a hwtid, then remove it from the idr table * so lookups will no longer find this endpoint. Otherwise * we have a race where one thread finds the ep ptr just * before the other thread is freeing the ep memory.
*/ if (ep->hwtid != -1)
remove_ep_tid(ep);
c4iw_put_ep(&ep->com);
}
staticint status2errno(int status)
{ switch (status) { case CPL_ERR_NONE: return 0; case CPL_ERR_CONN_RESET: return -ECONNRESET; case CPL_ERR_ARP_MISS: return -EHOSTUNREACH; case CPL_ERR_CONN_TIMEDOUT: return -ETIMEDOUT; case CPL_ERR_TCAM_FULL: return -ENOMEM; case CPL_ERR_CONN_EXIST: return -EADDRINUSE; default: return -EIO;
}
}
/* * Fake up a special CPL opcode and call sched() so process_work() will call * _put_ep_safe() in a safe context to free the ep resources. This is needed * because ARP error handlers are called in an ATOMIC context, and * _c4iw_free_ep() needs to block.
*/ staticvoid queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb, int cpl)
{ struct cpl_act_establish *rpl = cplhdr(skb);
/* Set our special ARP_FAILURE opcode */
rpl->ot.opcode = cpl;
/* * Save ep in the skb->cb area, after where sched() will save the dev * ptr.
*/
*((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep;
sched(ep->com.dev, skb);
}
/* Handle an ARP failure for an accept */ staticvoid pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
{ struct c4iw_ep *ep = handle;
pr_err("ARP failure during accept - tid %u - dropping connection\n",
ep->hwtid);
/* * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant * and send it along.
*/ staticvoid abort_arp_failure(void *handle, struct sk_buff *skb)
{ int ret; struct c4iw_ep *ep = handle; struct c4iw_rdev *rdev = &ep->com.dev->rdev; struct cpl_abort_req *req = cplhdr(skb);
/* * keep a ref on the ep so the tcb is not unlocked before this * cpl completes. The ref is released in read_tcb_rpl().
*/
c4iw_get_ep(&ep->com); if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
c4iw_put_ep(&ep->com);
}
/* * Specify the largest window that will fit in opt0. The * remainder will be specified in the rx_data_ack.
*/
win = ep->rcv_win >> 10; if (win > RCV_BUFSIZ_M)
win = RCV_BUFSIZ_M;
if (ep->plen)
memcpy(mpa->private_data + sizeof(struct mpa_v2_conn_params),
ep->mpa_pkt + sizeof(*mpa), ep->plen);
} else if (ep->plen)
memcpy(mpa->private_data,
ep->mpa_pkt + sizeof(*mpa), ep->plen);
/* * Reference the mpa skb. This ensures the data area * will remain in memory until the hw acks the tx. * Function fw4_ack() will deref it.
*/
skb_get(skb);
t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
ep->mpa_skb = skb;
ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); if (ret) return ret;
start_ep_timer(ep);
__state_set(&ep->com, MPA_REQ_SENT);
ep->mpa_attr.initiator = 1;
ep->snd_seq += mpalen; return ret;
}
if (ep->plen)
memcpy(mpa->private_data + sizeof(struct mpa_v2_conn_params), pdata, plen);
} else if (plen)
memcpy(mpa->private_data, pdata, plen);
/* * Reference the mpa skb again. This ensures the data area * will remain in memory until the hw acks the tx. * Function fw4_ack() will deref it.
*/
skb_get(skb);
set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
ep->mpa_skb = skb;
ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
if (ep->plen)
memcpy(mpa->private_data + sizeof(struct mpa_v2_conn_params), pdata, plen);
} else if (plen)
memcpy(mpa->private_data, pdata, plen);
/* * Reference the mpa skb. This ensures the data area * will remain in memory until the hw acks the tx. * Function fw4_ack() will deref it.
*/
skb_get(skb);
t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
ep->mpa_skb = skb;
__state_set(&ep->com, MPA_REP_SENT);
ep->snd_seq += mpalen; return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
}
/* * If we couldn't specify the entire rcv window at connection setup * due to the limit in the number of bits in the RCV_BUFSIZ field, * then add the overage in to the credits returned.
*/ if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
/* * process_mpa_reply - process streaming mode MPA reply * * Returns: * * 0 upon success indicating a connect request was delivered to the ULP * or the mpa request is incomplete but valid so far. * * 1 if a failure requires the caller to close the connection. * * 2 if a failure requires the caller to abort the connection.
*/ staticint process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
{ struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params;
u16 plen;
u16 resp_ird, resp_ord;
u8 rtr_mismatch = 0, insuff_ird = 0; struct c4iw_qp_attributes attrs; enum c4iw_qp_attr_mask mask; int err; int disconnect = 0;
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
/* * If we get more than the supported amount of private data * then we must fail this connection.
*/ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
err = -EINVAL; goto err_stop_timer;
}
/* * copy the new data into our accumulation buffer.
*/
skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
skb->len);
ep->mpa_pkt_len += skb->len;
/* * if we don't even have the mpa message, then bail.
*/ if (ep->mpa_pkt_len < sizeof(*mpa)) return 0;
mpa = (struct mpa_message *) ep->mpa_pkt;
/* Validate MPA header. */ if (mpa->revision > mpa_rev) {
pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
__func__, mpa_rev, mpa->revision);
err = -EPROTO; goto err_stop_timer;
} if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
err = -EPROTO; goto err_stop_timer;
}
plen = ntohs(mpa->private_data_size);
/* * Fail if there's too much private data.
*/ if (plen > MPA_MAX_PRIVATE_DATA) {
err = -EPROTO; goto err_stop_timer;
}
/* * If plen does not account for pkt size
*/ if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
err = -EPROTO; goto err_stop_timer;
}
ep->plen = (u8) plen;
/* * If we don't have all the pdata yet, then bail. * We'll continue process when more data arrives.
*/ if (ep->mpa_pkt_len < (sizeof(*mpa) + plen)) return 0;
/* * Stop mpa timer. If it expired, then * we ignore the MPA reply. process_timeout() * will abort the connection.
*/ if (stop_ep_timer(ep)) return 0;
/* * If we get here we have accumulated the entire mpa * start reply message including private data. And * the MPA header is valid.
*/
__state_set(&ep->com, FPDU_MODE);
ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
ep->mpa_attr.version = mpa->revision;
ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
if (mpa->revision == 2) {
ep->mpa_attr.enhanced_rdma_conn =
mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0; if (ep->mpa_attr.enhanced_rdma_conn) {
mpa_v2_params = (struct mpa_v2_conn_params *)
(ep->mpa_pkt + sizeof(*mpa));
resp_ird = ntohs(mpa_v2_params->ird) &
MPA_V2_IRD_ORD_MASK;
resp_ord = ntohs(mpa_v2_params->ord) &
MPA_V2_IRD_ORD_MASK;
pr_debug("responder ird %u ord %u ep ird %u ord %u\n",
resp_ird, resp_ord, ep->ird, ep->ord);
/* * This is a double-check. Ideally, below checks are * not required since ird/ord stuff has been taken * care of in c4iw_accept_cr
*/ if (ep->ird < resp_ord) { if (RELAXED_IRD_NEGOTIATION && resp_ord <=
ep->com.dev->rdev.lldi.max_ordird_qp)
ep->ird = resp_ord; else
insuff_ird = 1;
} elseif (ep->ird > resp_ord) {
ep->ird = resp_ord;
} if (ep->ord > resp_ird) { if (RELAXED_IRD_NEGOTIATION)
ep->ord = resp_ird; else
insuff_ird = 1;
} if (insuff_ird) {
err = -ENOMEM;
ep->ird = resp_ord;
ep->ord = resp_ird;
}
/* * If responder's RTR does not match with that of initiator, assign * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not * generated when moving QP to RTS state. * A TERM message will be sent after QP has moved to RTS state
*/ if ((ep->mpa_attr.version == 2) && peer2peer &&
(ep->mpa_attr.p2p_type != p2p_type)) {
ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
rtr_mismatch = 1;
}
/* bind QP and TID with INIT_WR */
err = c4iw_modify_qp(ep->com.qp->rhp,
ep->com.qp, mask, &attrs, 1); if (err) goto err;
/* * If responder's RTR requirement did not match with what initiator * supports, generate TERM message
*/ if (rtr_mismatch) {
pr_err("%s: RTR mismatch, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_NOMATCH_RTR;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
attrs.send_term = 1;
err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
err = -ENOMEM;
disconnect = 1; goto out;
}
/* * Generate TERM if initiator IRD is not sufficient for responder * provided ORD. Currently, we do the same behaviour even when * responder provided IRD is also not sufficient as regards to * initiator ORD.
*/ if (insuff_ird) {
pr_err("%s: Insufficient IRD, sending TERM\n", __func__);
attrs.layer_etype = LAYER_MPA | DDP_LLP;
attrs.ecode = MPA_INSUFF_IRD;
attrs.next_state = C4IW_QP_STATE_TERMINATE;
attrs.send_term = 1;
err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
err = -ENOMEM;
disconnect = 1; goto out;
} goto out;
err_stop_timer:
stop_ep_timer(ep);
err:
disconnect = 2;
out:
connect_reply_upcall(ep, err); return disconnect;
}
/* * process_mpa_request - process streaming mode MPA request * * Returns: * * 0 upon success indicating a connect request was delivered to the ULP * or the mpa request is incomplete but valid so far. * * 1 if a failure requires the caller to close the connection. * * 2 if a failure requires the caller to abort the connection.
*/ staticint process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
{ struct mpa_message *mpa; struct mpa_v2_conn_params *mpa_v2_params;
u16 plen;
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
/* * If we get more than the supported amount of private data * then we must fail this connection.
*/ if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) goto err_stop_timer;
pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
/* * Copy the new data into our accumulation buffer.
*/
skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
skb->len);
ep->mpa_pkt_len += skb->len;
/* * If we don't even have the mpa message, then bail. * We'll continue process when more data arrives.
*/ if (ep->mpa_pkt_len < sizeof(*mpa)) return 0;
/* * If this TCB had a srq buffer cached, then we must complete * it. For user mode, that means saving the srqidx in the * user/kernel status page for this qp. For kernel mode, just * synthesize the CQE now.
*/ if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) { if (ep->com.qp->ibqp.uobject)
t4_set_wq_in_error(&ep->com.qp->wq, srqidx); else
c4iw_flush_srqidx(ep->com.qp, srqidx);
}
}
/* * Specify the largest window that will fit in opt0. The * remainder will be specified in the rx_data_ack.
*/
win = ep->rcv_win >> 10; if (win > RCV_BUFSIZ_M)
win = RCV_BUFSIZ_M;
/* * Some of the error codes above implicitly indicate that there is no TID * allocated with the result of an ACT_OPEN. We use this predicate to make * that explicit.
*/ staticinlineint act_open_has_tid(int status)
{ return (status != CPL_ERR_TCAM_PARITY &&
status != CPL_ERR_TCAM_MISS &&
status != CPL_ERR_TCAM_FULL &&
status != CPL_ERR_CONN_EXIST_SYNRECV &&
status != CPL_ERR_CONN_EXIST);
}
staticchar *neg_adv_str(unsignedint status)
{ switch (status) { case CPL_ERR_RTX_NEG_ADVICE: return"Retransmit timeout"; case CPL_ERR_PERSIST_NEG_ADVICE: return"Persist timeout"; case CPL_ERR_KEEPALV_NEG_ADVICE: return"Keepalive timeout"; default: return"Unknown";
}
}
/* When MPA revision is different on nodes, the node with MPA_rev=2 * tries to reconnect with MPA_rev 1 for the same EP through * c4iw_reconnect(), where the same EP is assigned with new tid for * further connection establishment. As we are using the same EP pointer * for reconnect, few skbs are used during the previous c4iw_connect(), * which leaves the EP with inadequate skbs for further * c4iw_reconnect(), Further causing a crash due to an empty * skb_list() during peer_abort(). Allocate skbs which is already used.
*/
size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list)); if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) {
err = -ENOMEM; goto fail1;
}
/* * Allocate an active TID to initiate a TCP connection.
*/
ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep); if (ep->atid == -1) {
pr_err("%s - cannot alloc atid\n", __func__);
err = -ENOMEM; goto fail2;
}
err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL); if (err) goto fail2a;
/* send connect request to rnic */
err = send_connect(ep); if (!err) goto out;
cxgb4_l2t_release(ep->l2t);
fail4:
dst_release(ep->dst);
fail3:
xa_erase_irq(&ep->com.dev->atids, ep->atid);
fail2a:
cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
fail2: /* * remember to send notification to upper layer. * We are in here so the upper layer is not aware that this is * re-connect attempt and so, upper layer is still waiting for * response of 1st connect request.
*/
connect_reply_upcall(ep, -ECONNRESET);
fail1:
c4iw_put_ep(&ep->com);
out: return err;
}
dst_confirm(ep->dst);
mutex_lock(&ep->com.mutex);
ep->com.state = MPA_REQ_WAIT;
start_ep_timer(ep);
set_bit(PASS_ESTAB, &ep->com.history);
ret = send_flowc(ep);
mutex_unlock(&ep->com.mutex); if (ret)
c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
return 0;
}
staticint peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
{ struct cpl_peer_close *hdr = cplhdr(skb); struct c4iw_ep *ep; struct c4iw_qp_attributes attrs; int disconnect = 1; int release = 0; unsignedint tid = GET_TID(hdr); int ret;
ep = get_ep_from_tid(dev, tid); if (!ep) return 0;
pr_debug("ep %p tid %u\n", ep, ep->hwtid);
dst_confirm(ep->dst);
set_bit(PEER_CLOSE, &ep->com.history);
mutex_lock(&ep->com.mutex); switch (ep->com.state) { case MPA_REQ_WAIT:
__state_set(&ep->com, CLOSING); break; case MPA_REQ_SENT:
__state_set(&ep->com, CLOSING);
connect_reply_upcall(ep, -ECONNRESET); break; case MPA_REQ_RCVD:
/* * We're gonna mark this puppy DEAD, but keep * the reference on it until the ULP accepts or * rejects the CR. Also wake up anyone waiting * in rdma connection migration (see c4iw_accept_cr()).
*/
__state_set(&ep->com, CLOSING);
pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); break; case MPA_REP_SENT:
__state_set(&ep->com, CLOSING);
pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET); break; case FPDU_MODE:
start_ep_timer(ep);
__state_set(&ep->com, CLOSING);
attrs.next_state = C4IW_QP_STATE_CLOSING;
ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1); if (ret != -ECONNRESET) {
peer_close_upcall(ep);
disconnect = 1;
} break; case ABORTING:
disconnect = 0; break; case CLOSING:
__state_set(&ep->com, MORIBUND);
disconnect = 0; break; case MORIBUND:
(void)stop_ep_timer(ep); if (ep->com.cm_id && ep->com.qp) {
attrs.next_state = C4IW_QP_STATE_IDLE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
close_complete_upcall(ep, 0);
__state_set(&ep->com, DEAD);
release = 1;
disconnect = 0; break; case DEAD:
disconnect = 0; break; default:
WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
}
mutex_unlock(&ep->com.mutex); if (disconnect)
c4iw_ep_disconnect(ep, 0, GFP_KERNEL); if (release)
release_ep_resources(ep);
c4iw_put_ep(&ep->com); return 0;
}
u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
ep = get_ep_from_tid(dev, tid); if (!ep) return 0;
status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
if (cxgb_is_neg_adv(status)) {
pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
ep->hwtid, status, neg_adv_str(status));
ep->stats.abort_neg_adv++;
mutex_lock(&dev->rdev.stats.lock);
dev->rdev.stats.neg_adv++;
mutex_unlock(&dev->rdev.stats.lock); goto deref_ep;
}
pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
ep->com.state);
set_bit(PEER_ABORT, &ep->com.history);
/* * Wake up any threads in rdma_init() or rdma_fini(). * However, this is not needed if com state is just * MPA_REQ_SENT
*/ if (ep->com.state != MPA_REQ_SENT)
c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
mutex_lock(&ep->com.mutex); switch (ep->com.state) { case CONNECTING:
c4iw_put_ep(&ep->parent_ep->com); break; case MPA_REQ_WAIT:
(void)stop_ep_timer(ep); break; case MPA_REQ_SENT:
(void)stop_ep_timer(ep); if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
(mpa_rev == 2 && ep->tried_with_mpa_v1))
connect_reply_upcall(ep, -ECONNRESET); else { /* * we just don't send notification upwards because we * want to retry with mpa_v1 without upper layers even * knowing it. * * do some housekeeping so as to re-initiate the * connection
*/
pr_info("%s: mpa_rev=%d. Retrying with mpav1\n",
__func__, mpa_rev);
ep->retry_with_mpa_v1 = 1;
} break; case MPA_REP_SENT: break; case MPA_REQ_RCVD: break; case MORIBUND: case CLOSING:
stop_ep_timer(ep);
fallthrough; case FPDU_MODE: if (ep->com.qp && ep->com.qp->srq) {
srqidx = ABORT_RSS_SRQIDX_G(
be32_to_cpu(req->srqidx_status)); if (srqidx) {
complete_cached_srq_buffers(ep, srqidx);
} else { /* Hold ep ref until finish_peer_abort() */
c4iw_get_ep(&ep->com);
__state_set(&ep->com, ABORTING);
set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags);
read_tcb(ep); break;
}
}
if (ep->com.cm_id && ep->com.qp) {
attrs.next_state = C4IW_QP_STATE_ERROR;
ret = c4iw_modify_qp(ep->com.qp->rhp,
ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1); if (ret)
pr_err("%s - qp <- error failed!\n", __func__);
}
peer_abort_upcall(ep); break; case ABORTING: break; case DEAD:
pr_warn("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
mutex_unlock(&ep->com.mutex); goto deref_ep; default:
WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); break;
}
dst_confirm(ep->dst); if (ep->com.state != ABORTING) {
__state_set(&ep->com, DEAD); /* we don't release if we want to retry with mpa_v1 */ if (!ep->retry_with_mpa_v1)
release = 1;
}
mutex_unlock(&ep->com.mutex);
if (ep) { if (ep->com.qp) {
pr_warn("TERM received tid %u qpid %u\n", tid,
ep->com.qp->wq.sq.qid);
attrs.next_state = C4IW_QP_STATE_TERMINATE;
c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
}
/* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3, * when entering the TERM state the RNIC MUST initiate a CLOSE.
*/
c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
} else
pr_warn("TERM received tid %u no ep/qp\n", tid);
return 0;
}
/* * Upcall from the adapter indicating data has been transmitted. * For us its just the single MPA request or reply. We can now free * the skb holding the mpa message.
*/ staticint fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
{ struct c4iw_ep *ep; struct cpl_fw4_ack *hdr = cplhdr(skb);
u8 credits = hdr->credits; unsignedint tid = GET_TID(hdr);
ep = get_ep_from_tid(dev, tid); if (!ep) return 0;
pr_debug("ep %p tid %u credits %u\n",
ep, ep->hwtid, credits); if (credits == 0) {
pr_debug("0 credit ack ep %p tid %u state %u\n",
ep, ep->hwtid, state_read(&ep->com)); goto out;
}
dst_confirm(ep->dst); if (ep->mpa_skb) {
pr_debug("last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n",
ep, ep->hwtid, state_read(&ep->com),
ep->mpa_attr.initiator ? 1 : 0);
mutex_lock(&ep->com.mutex);
kfree_skb(ep->mpa_skb);
ep->mpa_skb = NULL; if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
stop_ep_timer(ep);
mutex_unlock(&ep->com.mutex);
}
out:
c4iw_put_ep(&ep->com); return 0;
}
int c4iw_reject_cr(struct iw_cm_id *cm_id, constvoid *pdata, u8 pdata_len)
{ int abort; struct c4iw_ep *ep = to_ep(cm_id);
int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
{ int ret = 0; int close = 0; int fatal = 0; struct c4iw_rdev *rdev;
mutex_lock(&ep->com.mutex);
pr_debug("ep %p state %s, abrupt %d\n", ep,
states[ep->com.state], abrupt);
/* * Ref the ep here in case we have fatal errors causing the * ep to be released and freed.
*/
c4iw_get_ep(&ep->com);
rdev = &ep->com.dev->rdev; if (c4iw_fatal_error(rdev)) {
fatal = 1;
close_complete_upcall(ep, -EIO);
ep->com.state = DEAD;
} switch (ep->com.state) { case MPA_REQ_WAIT: case MPA_REQ_SENT: case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: case CONNECTING:
close = 1; if (abrupt)
ep->com.state = ABORTING; else {
ep->com.state = CLOSING;
/* * if we close before we see the fw4_ack() then we fix * up the timer state since we're reusing it.
*/ if (ep->mpa_skb &&
test_bit(STOP_MPA_TIMER, &ep->com.flags)) {
clear_bit(STOP_MPA_TIMER, &ep->com.flags);
stop_ep_timer(ep);
}
start_ep_timer(ep);
}
set_bit(CLOSE_SENT, &ep->com.flags); break; case CLOSING: if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
close = 1; if (abrupt) {
(void)stop_ep_timer(ep);
ep->com.state = ABORTING;
} else
ep->com.state = MORIBUND;
} break; case MORIBUND: case ABORTING: case DEAD:
pr_debug("ignoring disconnect ep %p state %u\n",
ep, ep->com.state); break; default:
WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state); break;
}
if (close) { if (abrupt) {
set_bit(EP_DISC_ABORT, &ep->com.history);
ret = send_abort(ep);
} else {
set_bit(EP_DISC_CLOSE, &ep->com.history);
ret = send_halfclose(ep);
} if (ret) {
set_bit(EP_DISC_FAIL, &ep->com.history); if (!abrupt) {
stop_ep_timer(ep);
close_complete_upcall(ep, -EIO);
} if (ep->com.qp) { struct c4iw_qp_attributes attrs;
ep = get_ep_from_tid(dev, tid); if (!ep) return 0; /* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to * determine if there's a rx PDU feedback event pending. * * If that bit is set, it means we'll need to re-read the TCB's * rq_start value. The final value is the one present in a TCB * with the TF_RX_PDU_OUT bit cleared.
*/
c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */
c4iw_put_ep(&ep->com); /* from read_tcb() */
/* If TF_RX_PDU_OUT bit is set, re-read the TCB */ if (rx_pdu_out) { if (++ep->rx_pdu_out_cnt >= 2) {
WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n"); goto cleanup;
}
read_tcb(ep); return 0;
}
/* * We need to parse the TCP options from SYN packet. * to generate cpl_pass_accept_req.
*/
memset(&tmp_opt, 0, sizeof(tmp_opt));
tcp_clear_options(&tmp_opt);
tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
/* * We store the qid in opt2 which will be used by the firmware * to send us the wr response.
*/
req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid));
/* * We initialize the MSS index in TCB to 0xF. * So that when driver sends cpl_pass_accept_rpl * TCB picks up the correct value. If this was 0 * TP will ignore any value > 0 for MSS index.
*/
req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF));
req->cookie = (uintptr_t)skb;
/* * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt * messages when a filter is being used instead of server to * redirect a syn packet. When packets hit filter they are redirected * to the offload queue and driver tries to establish the connection * using firmware work request.
*/ staticint rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
{ int stid; unsignedint filter; struct ethhdr *eh = NULL; struct vlan_ethhdr *vlan_eh = NULL; struct iphdr *iph; struct tcphdr *tcph; struct rss_header *rss = (void *)skb->data; struct cpl_rx_pkt *cpl = (void *)skb->data; struct cpl_pass_accept_req *req = (void *)(rss + 1); struct l2t_entry *e; struct dst_entry *dst; struct c4iw_ep *lep = NULL;
u16 window; struct port_info *pi; struct net_device *pdev;
u16 rss_qid, eth_hdr_len; int step; struct neighbour *neigh;
/* Drop all non-SYN packets */ if (!(cpl->l2info & cpu_to_be32(RXF_SYN_F))) goto reject;
/* * Drop all packets which did not hit the filter. * Unlikely to happen.
*/ if (!(rss->filter_hit && rss->filter_tid)) goto reject;
/* * Calculate the server tid from filter hit index from cpl_rx_pkt.
*/
stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
/* Calcuate filter portion for LE region. */
filter = (__force unsignedint) cpu_to_be32(cxgb4_select_ntuple(
dev->rdev.lldi.ports[0],
e));
/* * Synthesize the cpl_pass_accept_req. We have everything except the * TID. Once firmware sends a reply with TID we update the TID field * in cpl and pass it through the regular cpl_pass_accept_req path.
*/
build_cpl_pass_accept_req(skb, stid, iph->tos);
send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr,
tcph->source, ntohl(tcph->seq), filter, window,
rss_qid, pi->port_id);
cxgb4_l2t_release(e);
free_dst:
dst_release(dst);
reject: if (lep)
c4iw_put_ep(&lep->com); return 0;
}
/* * These are the real handlers that are called from a * work queue.
*/ static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
[CPL_ACT_ESTABLISH] = act_establish,
[CPL_ACT_OPEN_RPL] = act_open_rpl,
[CPL_RX_DATA] = rx_data,
[CPL_ABORT_RPL_RSS] = abort_rpl,
[CPL_ABORT_RPL] = abort_rpl,
[CPL_PASS_OPEN_RPL] = pass_open_rpl,
[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
[CPL_PASS_ACCEPT_REQ] = pass_accept_req,
[CPL_PASS_ESTABLISH] = pass_establish,
[CPL_PEER_CLOSE] = peer_close,
[CPL_ABORT_REQ_RSS] = peer_abort,
[CPL_CLOSE_CON_RPL] = close_con_rpl,
[CPL_RDMA_TERMINATE] = terminate,
[CPL_FW4_ACK] = fw4_ack,
[CPL_GET_TCB_RPL] = read_tcb_rpl,
[CPL_FW6_MSG] = deferred_fw6_msg,
[CPL_RX_PKT] = rx_pkt,
[FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
[FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe
};
mutex_lock(&ep->com.mutex);
pr_debug("ep %p tid %u state %d\n", ep, ep->hwtid, ep->com.state);
set_bit(TIMEDOUT, &ep->com.history); switch (ep->com.state) { case MPA_REQ_SENT:
connect_reply_upcall(ep, -ETIMEDOUT); break; case MPA_REQ_WAIT: case MPA_REQ_RCVD: case MPA_REP_SENT: case FPDU_MODE: break; case CLOSING: case MORIBUND: if (ep->com.cm_id && ep->com.qp) {
attrs.next_state = C4IW_QP_STATE_ERROR;
c4iw_modify_qp(ep->com.qp->rhp,
ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
&attrs, 1);
}
close_complete_upcall(ep, -ETIMEDOUT); break; case ABORTING: case DEAD:
/* * These states are expected if the ep timed out at the same * time as another thread was calling stop_ep_timer(). * So we silently do nothing for these states.
*/
abort = 0; break; default:
WARN(1, "%s unexpected state ep %p tid %u state %u\n",
__func__, ep, ep->hwtid, ep->com.state);
abort = 0;
}
mutex_unlock(&ep->com.mutex); if (abort)
c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
c4iw_put_ep(&ep->com);
}
spin_lock(&timeout_lock); if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) { /* * Only insert if it is not already on the list.
*/ if (!ep->entry.next) {
list_add_tail(&ep->entry, &timeout_list);
kickit = 1;
}
}
spin_unlock(&timeout_lock); if (kickit)
queue_work(workq, &skb_work);
}
/* * All the CM events are handled on a work queue to have a safe context.
*/ staticint sched(struct c4iw_dev *dev, struct sk_buff *skb)
{
/* * Save dev in the skb->cb area.
*/
*((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
/* * Queue the skb and schedule the worker thread.
*/
skb_queue_tail(&rxq, skb);
queue_work(workq, &skb_work); return 0;
}
ep = get_ep_from_tid(dev, tid); /* This EP will be dereferenced in peer_abort() */ if (!ep) {
pr_warn("Abort on non-existent endpoint, tid %d\n", tid);
kfree_skb(skb); return 0;
} if (cxgb_is_neg_adv(req->status)) {
pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
ep->hwtid, req->status,
neg_adv_str(req->status)); goto out;
}
pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state);
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen0.65Angebot
(Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können 2026-04-29)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.