/* * Copyright (c) 2012 Mellanox Technologies. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE.
*/ /***********************************************************/ /*This file support the handling of the Alias GUID feature. */ /***********************************************************/ #include <rdma/ib_mad.h> #include <rdma/ib_smi.h> #include <rdma/ib_cache.h> #include <rdma/ib_sa.h> #include <rdma/ib_pack.h> #include <linux/mlx4/cmd.h> #include <linux/init.h> #include <linux/errno.h> #include <rdma/ib_user_verbs.h> #include <linux/delay.h> #include"mlx4_ib.h"
/* The driver keeps the current state of all guids, as they are in the HW. Whenever we receive an smp mad GUIDInfo record, the data will be cached.
*/
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { /* The location of the specific index starts from bit number 4
* until bit num 11 */ if (test_bit(i + 4, (unsignedlong *)&guid_indexes)) {
slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; if (slave_id >= dev->dev->num_slaves) {
pr_debug("The last slave: %d\n", slave_id); return;
}
/* cache the guid: */
memcpy(&dev->sriov.demux[port_index].guid_cache[slave_id],
&p_data[i * GUID_REC_SIZE],
GUID_REC_SIZE);
} else
pr_debug("Guid number: %d in block: %d" " was not updated\n", i, block_num);
}
}
static __be64 get_cached_alias_guid(struct mlx4_ib_dev *dev, int port, int index)
{ if (index >= NUM_ALIAS_GUID_PER_PORT) {
pr_err("%s: ERROR: asked for index:%d\n", __func__, index); return (__force __be64) -1;
} return *(__be64 *)&dev->sriov.demux[port - 1].guid_cache[index];
}
void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave, int port, int slave_init)
{
__be64 curr_guid, required_guid; int record_num = slave / 8; int index = slave % 8; int port_index = port - 1; unsignedlong flags; int do_work = 0;
if (do_work)
mlx4_ib_init_alias_guid_work(dev, port_index);
}
/* * Whenever new GUID is set/unset (guid table change) create event and * notify the relevant slave (master also should be notified). * If the GUID value is not as we have in the cache the slave will not be * updated; in this case it waits for the smp_snoop or the port management * event to call the function and to update the slave. * block_number - the index of the block (16 blocks available) * port_number - 1 or 2
*/ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev, int block_num, u32 port_num,
u8 *p_data)
{ int i;
u64 guid_indexes; int slave_id, slave_port; enum slave_port_state new_state; enum slave_port_state prev_state;
__be64 tmp_cur_ag, form_cache_ag; enum slave_port_gen_event gen_event; struct mlx4_sriov_alias_guid_info_rec_det *rec; unsignedlong flags;
__be64 required_value;
/*calculate the slaves and notify them*/ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { /* the location of the specific index runs from bits 4..11 */ if (!(test_bit(i + 4, (unsignedlong *)&guid_indexes))) continue;
slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ; if (slave_id >= dev->dev->persist->num_vfs + 1) return;
slave_port = mlx4_phys_to_slave_port(dev->dev, slave_id, port_num); if (slave_port < 0) /* this port isn't available for the VF */ continue;
tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
form_cache_ag = get_cached_alias_guid(dev, port_num,
(NUM_ALIAS_GUID_IN_REC * block_num) + i); /* * Check if guid is not the same as in the cache, * If it is different, wait for the snoop_smp or the port mgmt * change event to update the slave on its port state change
*/ if (tmp_cur_ag != form_cache_ag) continue;
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags); for (i = 0 ; i < NUM_ALIAS_GUID_IN_REC; i++) {
__be64 sm_response, required_val;
if (!(cb_ctx->guid_indexes &
mlx4_ib_get_aguid_comp_mask_from_ix(i))) continue;
sm_response = *(__be64 *)&guid_rec->guid_info_list
[i * GUID_REC_SIZE];
required_val = *(__be64 *)&rec->all_recs[i * GUID_REC_SIZE]; if (cb_ctx->method == MLX4_GUID_INFO_RECORD_DELETE) { if (required_val ==
cpu_to_be64(MLX4_GUID_FOR_DELETE_VAL)) goto next_entry;
/* A new value was set till we got the response */
pr_debug("need to set new value %llx, record num %d, block_num:%d\n",
be64_to_cpu(required_val),
i, guid_rec->block_num); goto entry_declined;
}
/* check if the SM didn't assign one of the records. * if it didn't, re-ask for.
*/ if (sm_response == MLX4_NOT_SET_GUID) { if (rec->guids_retry_schedule[i] == 0)
mlx4_ib_warn(&dev->ib_dev, "%s:Record num %d in block_num: %d was declined by SM\n",
__func__, i,
guid_rec->block_num); goto entry_declined;
} else { /* properly assigned record. */ /* We save the GUID we just got from the SM in the * admin_guid in order to be persistent, and in the
* request from the sm the process will ask for the same GUID */ if (required_val &&
sm_response != required_val) { /* Warn only on first retry */ if (rec->guids_retry_schedule[i] == 0)
mlx4_ib_warn(&dev->ib_dev, "%s: Failed to set" " admin guid after SysAdmin " "configuration. " "Record num %d in block_num:%d " "was declined by SM, " "new val(0x%llx) was kept, SM returned (0x%llx)\n",
__func__, i,
guid_rec->block_num,
be64_to_cpu(required_val),
be64_to_cpu(sm_response)); goto entry_declined;
} else {
*(__be64 *)&rec->all_recs[i * GUID_REC_SIZE] =
sm_response; if (required_val == 0)
mlx4_set_admin_guid(dev->dev,
sm_response,
(guid_rec->block_num
* NUM_ALIAS_GUID_IN_REC) + i,
cb_ctx->port); goto next_entry;
}
}
entry_declined:
declined_guid_indexes |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
rec->guids_retry_schedule[i] =
(rec->guids_retry_schedule[i] == 0) ? 1 :
min((unsignedint)60,
rec->guids_retry_schedule[i] * 2); /* using the minimum value among all entries in that record */
resched_delay_sec = (resched_delay_sec == 0) ?
rec->guids_retry_schedule[i] :
min(resched_delay_sec,
rec->guids_retry_schedule[i]); continue;
next_entry:
rec->guids_retry_schedule[i] = 0;
}
applied_guid_indexes = cb_ctx->guid_indexes & ~declined_guid_indexes; if (declined_guid_indexes ||
rec->guid_indexes & ~(applied_guid_indexes)) {
pr_debug("record=%d wasn't fully set, guid_indexes=0x%llx applied_indexes=0x%llx, declined_indexes=0x%llx\n",
guid_rec->block_num,
be64_to_cpu((__force __be64)rec->guid_indexes),
be64_to_cpu((__force __be64)applied_guid_indexes),
be64_to_cpu((__force __be64)declined_guid_indexes));
rec->time_to_run = ktime_get_boottime_ns() +
resched_delay_sec * NSEC_PER_SEC;
} else {
rec->status = MLX4_GUID_INFO_STATUS_SET;
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags); /* The func is call here to close the cases when the sm doesn't send smp, so in the sa response the driver notifies the slave.
*/
mlx4_ib_notify_slaves_on_guid_change(dev, guid_rec->block_num,
cb_ctx->port,
guid_rec->guid_info_list);
out:
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); if (!dev->sriov.is_going_down) {
get_low_record_time_index(dev, port_index, &resched_delay_sec);
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port_index].wq,
&dev->sriov.alias_guid.ports_guid[port_index].
alias_guid_work,
msecs_to_jiffies(resched_delay_sec * 1000));
} if (cb_ctx->sa_query) {
list_del(&cb_ctx->list);
kfree(cb_ctx);
} else
complete(&cb_ctx->done);
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
staticvoid invalidate_guid_record(struct mlx4_ib_dev *dev, u8 port, int index)
{ int i;
u64 cur_admin_val;
ib_sa_comp_mask comp_mask = 0;
/* calculate the comp_mask for that record.*/ for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) {
cur_admin_val =
*(u64 *)&dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].all_recs[GUID_REC_SIZE * i]; /* check the admin value: if it's for delete (~00LL) or it is the first guid of the first record (hw guid) or the records is not in ownership of the sysadmin and the sm doesn't need to assign GUIDs, then don't put it up for assignment.
*/ if (MLX4_GUID_FOR_DELETE_VAL == cur_admin_val ||
(!index && !i)) continue;
comp_mask |= mlx4_ib_get_aguid_comp_mask_from_ix(i);
}
dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].guid_indexes |= comp_mask; if (dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].guid_indexes)
dev->sriov.alias_guid.ports_guid[port - 1].
all_rec_per_port[index].status = MLX4_GUID_INFO_STATUS_IDLE;
if (dev->sriov.alias_guid.ports_guid[port - 1].state_flags &
GUID_STATE_NEED_PORT_INIT) {
mlx4_ib_guid_port_init(dev, port);
dev->sriov.alias_guid.ports_guid[port - 1].state_flags &=
(~GUID_STATE_NEED_PORT_INIT);
} for (i = 0; i < NUM_ALIAS_GUID_REC_IN_PORT; i++)
invalidate_guid_record(dev, port, i);
if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down) { /* make sure no work waits in the queue, if the work is already queued(not on the timer) the cancel will fail. That is not a problem because we just want the work started.
*/
cancel_delayed_work(&dev->sriov.alias_guid.
ports_guid[port - 1].alias_guid_work);
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port - 1].wq,
&dev->sriov.alias_guid.ports_guid[port - 1].alias_guid_work,
0);
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
staticvoid set_required_record(struct mlx4_ib_dev *dev, u8 port, struct mlx4_next_alias_guid_work *next_rec, int record_index)
{ int i; int lowset_time_entry = -1; int lowest_time = 0;
ib_sa_comp_mask delete_guid_indexes = 0;
ib_sa_comp_mask set_guid_indexes = 0; struct mlx4_sriov_alias_guid_info_rec_det *rec =
&dev->sriov.alias_guid.ports_guid[port].
all_rec_per_port[record_index];
for (i = 0; i < NUM_ALIAS_GUID_IN_REC; i++) { if (!(rec->guid_indexes &
mlx4_ib_get_aguid_comp_mask_from_ix(i))) continue;
/* return index of record that should be updated based on lowest * rescheduled time
*/ staticint get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port, int *resched_delay_sec)
{ int record_index = -1;
u64 low_record_time = 0; struct mlx4_sriov_alias_guid_info_rec_det rec; int j;
/* The function returns the next record that was
* not configured (or failed to be configured) */ staticint get_next_record_to_update(struct mlx4_ib_dev *dev, u8 port, struct mlx4_next_alias_guid_work *rec)
{ unsignedlong flags; int record_index; int ret = 0;
rec = kzalloc(sizeof *rec, GFP_KERNEL); if (!rec) return;
pr_debug("starting [port: %d]...\n", sriov_alias_port->port + 1);
ret = get_next_record_to_update(dev, sriov_alias_port->port, rec); if (ret) {
pr_debug("No more records to update.\n"); goto out;
}
void mlx4_ib_init_alias_guid_work(struct mlx4_ib_dev *dev, int port)
{ unsignedlong flags, flags1;
if (!mlx4_is_master(dev->dev)) return;
spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
spin_lock_irqsave(&dev->sriov.alias_guid.ag_work_lock, flags1); if (!dev->sriov.is_going_down) { /* If there is pending one should cancel then run, otherwise * won't run till previous one is ended as same work * struct is used.
*/
cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[port].
alias_guid_work);
queue_delayed_work(dev->sriov.alias_guid.ports_guid[port].wq,
&dev->sriov.alias_guid.ports_guid[port].alias_guid_work, 0);
}
spin_unlock_irqrestore(&dev->sriov.alias_guid.ag_work_lock, flags1);
spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
}
for (i = 1; i <= dev->num_ports; ++i) { if (dev->ib_dev.ops.query_gid(&dev->ib_dev, i, 0, &gid)) {
ret = -EFAULT; goto err_unregister;
}
}
for (i = 0 ; i < dev->num_ports; i++) {
memset(&dev->sriov.alias_guid.ports_guid[i], 0, sizeof (struct mlx4_sriov_alias_guid_port_rec_det));
dev->sriov.alias_guid.ports_guid[i].state_flags |=
GUID_STATE_NEED_PORT_INIT; for (j = 0; j < NUM_ALIAS_GUID_REC_IN_PORT; j++) { /* mark each val as it was deleted */
memset(dev->sriov.alias_guid.ports_guid[i].
all_rec_per_port[j].all_recs, 0xFF, sizeof(dev->sriov.alias_guid.ports_guid[i].
all_rec_per_port[j].all_recs));
}
INIT_LIST_HEAD(&dev->sriov.alias_guid.ports_guid[i].cb_list); /*prepare the records, set them to be allocated by sm*/ if (mlx4_ib_sm_guid_assign) for (j = 1; j < NUM_ALIAS_GUID_PER_PORT; j++)
mlx4_set_admin_guid(dev->dev, 0, j, i + 1); for (j = 0 ; j < NUM_ALIAS_GUID_REC_IN_PORT; j++)
invalidate_guid_record(dev, i + 1, j);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.