/* * Copyright (c) 2012 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE.
*/
/* refcount is the reference count for the following: 1. Each queued request 2. Each invocation of the worker thread 3. Membership of the port at the SA
*/
atomic_t refcount;
/* delayed work to clean pending SM request */ struct delayed_work timeout_work; struct list_head cleanup_list;
};
struct mcast_req { int func; struct ib_sa_mad sa_mad; struct list_head group_list; struct list_head func_list; struct mcast_group *group; int clean;
};
#define safe_atomic_dec(ref) \ do {\ if (atomic_dec_and_test(ref)) \
mcg_warn_group(group, "did not expect to reach zero\n"); \
} while (0)
staticconstchar *get_state_string(enum mcast_group_state state)
{ switch (state) { case MCAST_IDLE: return"MCAST_IDLE"; case MCAST_JOIN_SENT: return"MCAST_JOIN_SENT"; case MCAST_LEAVE_SENT: return"MCAST_LEAVE_SENT"; case MCAST_RESP_READY: return"MCAST_RESP_READY";
} return"Invalid State";
}
staticstruct mcast_group *mcast_find(struct mlx4_ib_demux_ctx *ctx, union ib_gid *mgid)
{ struct rb_node *node = ctx->mcg_table.rb_node; struct mcast_group *group; int ret;
while (node) {
group = rb_entry(node, struct mcast_group, node);
ret = memcmp(mgid->raw, group->rec.mgid.raw, sizeof *mgid); if (!ret) return group;
/* we rely on a mad request as arrived from a VF */
memcpy(&mad, sa_mad, sizeof mad);
/* fix port GID to be the real one (slave 0) */
sa_mad_data->port_gid.global.interface_id = group->demux->guid_cache[0];
/* assign our own TID */
mad.mad_hdr.tid = mlx4_ib_get_new_demux_tid(group->demux);
group->last_req_tid = mad.mad_hdr.tid; /* keep it for later validation */
ret = send_mad_to_wire(group->demux, (struct ib_mad *)&mad); /* set timeout handler */ if (!ret) { /* calls mlx4_ib_mcg_timeout_handler */
queue_delayed_work(group->demux->mcg_wq, &group->timeout_work,
secs_to_jiffies(MAD_TIMEOUT_SEC));
}
static u16 cmp_rec(struct ib_sa_mcmember_data *src, struct ib_sa_mcmember_data *dst, ib_sa_comp_mask comp_mask)
{ /* src is group record, dst is request record */ /* MGID must already match */ /* Port_GID we always replace to our Port_GID, so it is a match */
/* release group, return 1 if this was last release and group is destroyed
* timout work is canceled sync */ staticint release_group(struct mcast_group *group, int from_timeout_handler)
{ struct mlx4_ib_demux_ctx *ctx = group->demux; int nzgroup;
mutex_lock(&ctx->mcg_table_lock);
mutex_lock(&group->lock); if (atomic_dec_and_test(&group->refcount)) { if (!from_timeout_handler) { if (group->state != MCAST_IDLE &&
!cancel_delayed_work(&group->timeout_work)) {
atomic_inc(&group->refcount);
mutex_unlock(&group->lock);
mutex_unlock(&ctx->mcg_table_lock); return 0;
}
}
nzgroup = memcmp(&group->rec.mgid, &mgid0, sizeof mgid0); if (nzgroup)
del_sysfs_port_mcg_attr(ctx->dev, ctx->port, &group->dentry.attr); if (!list_empty(&group->pending_list))
mcg_warn_group(group, "releasing a group with non empty pending list\n"); if (nzgroup)
rb_erase(&group->node, &ctx->mcg_table);
list_del_init(&group->mgid0_list);
mutex_unlock(&group->lock);
mutex_unlock(&ctx->mcg_table_lock);
kfree(group); return 1;
} else {
mutex_unlock(&group->lock);
mutex_unlock(&ctx->mcg_table_lock);
} return 0;
}
staticvoid adjust_membership(struct mcast_group *group, u8 join_state, int inc)
{ int i;
for (i = 0; i < 3; i++, join_state >>= 1) if (join_state & 0x1)
group->members[i] += inc;
}
if (join_mask == (group_join_state & join_mask)) { /* port's membership need not change */
status = cmp_rec(&group->rec, sa_data, req->sa_mad.sa_hdr.comp_mask); if (!status)
join_group(group, req->func, join_mask);
staticvoid mlx4_ib_mcg_work_handler(struct work_struct *work)
{ struct mcast_group *group; struct mcast_req *req = NULL; struct ib_sa_mcmember_data *sa_data;
u8 req_join_state; int rc = 1; /* release_count - this is for the scheduled work */
u16 status;
u8 method;
group = container_of(work, typeof(*group), work);
mutex_lock(&group->lock);
/* First, let's see if a response from SM is waiting regarding this group. * If so, we need to update the group's REC. If this is a bad response, we * may need to send a bad response to a VF waiting for it. If VF is waiting
* and this is a good response, the VF will be answered later in this func. */ if (group->state == MCAST_RESP_READY) { /* cancels mlx4_ib_mcg_timeout_handler */
cancel_delayed_work(&group->timeout_work);
status = be16_to_cpu(group->response_sa_mad.mad_hdr.status);
method = group->response_sa_mad.mad_hdr.method; if (group->last_req_tid != group->response_sa_mad.mad_hdr.tid) {
mcg_warn_group(group, "Got MAD response to existing MGID but wrong TID, dropping. Resp TID=%llx, group TID=%llx\n",
be64_to_cpu(group->response_sa_mad.mad_hdr.tid),
be64_to_cpu(group->last_req_tid));
group->state = group->prev_state; goto process_requests;
} if (status) { if (!list_empty(&group->pending_list))
req = list_first_entry(&group->pending_list, struct mcast_req, group_list); if (method == IB_MGMT_METHOD_GET_RESP) { if (req) {
send_reply_to_slave(req->func, group, &req->sa_mad, status);
--group->func[req->func].num_pend_reqs;
list_del(&req->group_list);
list_del(&req->func_list);
kfree(req);
++rc;
} else
mcg_warn_group(group, "no request for failed join\n");
} elseif (method == IB_SA_METHOD_DELETE_RESP && group->demux->flushing)
++rc;
} else {
u8 resp_join_state;
u8 cur_join_state;
process_requests: /* We should now go over pending join/leave requests, as long as we are idle. */ while (!list_empty(&group->pending_list) && group->state == MCAST_IDLE) {
req = list_first_entry(&group->pending_list, struct mcast_req,
group_list);
sa_data = (struct ib_sa_mcmember_data *)req->sa_mad.data;
req_join_state = sa_data->scope_join_state & 0xf;
/* For a leave request, we will immediately answer the VF, and * update our internal counters. The actual leave will be sent
* to SM later, if at all needed. We dequeue the request now. */ if (req->sa_mad.mad_hdr.method == IB_SA_METHOD_DELETE)
rc += handle_leave_req(group, req_join_state, req); else
rc += handle_join_req(group, req_join_state, req);
}
staticstruct mcast_group *acquire_group(struct mlx4_ib_demux_ctx *ctx, union ib_gid *mgid, int create)
{ struct mcast_group *group, *cur_group; int is_mgid0; int i;
is_mgid0 = !memcmp(&mgid0, mgid, sizeof mgid0); if (!is_mgid0) {
group = mcast_find(ctx, mgid); if (group) goto found;
}
if (!create) return ERR_PTR(-ENOENT);
group = kzalloc(sizeof(*group), GFP_KERNEL); if (!group) return ERR_PTR(-ENOMEM);
atomic_inc(&group->refcount); /* for the request */
atomic_inc(&group->refcount); /* for scheduling the work */
list_add_tail(&req->group_list, &group->pending_list);
list_add_tail(&req->func_list, &group->func[req->func].pending); /* calls mlx4_ib_mcg_work_handler */ if (!queue_work(group->demux->mcg_wq, &group->work))
safe_atomic_dec(&group->refcount);
}
switch (mad->mad_hdr.method) { case IB_MGMT_METHOD_GET_RESP: case IB_SA_METHOD_DELETE_RESP:
mutex_lock(&ctx->mcg_table_lock);
group = acquire_group(ctx, &rec->mgid, 0);
mutex_unlock(&ctx->mcg_table_lock); if (IS_ERR(group)) { if (mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP) {
__be64 tid = mad->mad_hdr.tid;
*(u8 *)(&tid) = (u8)slave; /* in group we kept the modified TID */
group = search_relocate_mgid0_group(ctx, tid, &rec->mgid);
} else
group = NULL;
}
if (!group) return 1;
mutex_lock(&group->lock);
group->response_sa_mad = *mad;
group->prev_state = group->state;
group->state = MCAST_RESP_READY; /* calls mlx4_ib_mcg_work_handler */
atomic_inc(&group->refcount); if (!queue_work(ctx->mcg_wq, &group->work))
safe_atomic_dec(&group->refcount);
mutex_unlock(&group->lock);
release_group(group, 0); return 1; /* consumed */ case IB_MGMT_METHOD_SET: case IB_SA_METHOD_GET_TABLE: case IB_SA_METHOD_GET_TABLE_RESP: case IB_SA_METHOD_DELETE: return 0; /* not consumed, pass-through to guest over tunnel */ default:
mcg_warn("In demux, port %d: unexpected MCMember method: 0x%x, dropping\n",
port, mad->mad_hdr.method); return 1; /* consumed */
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.