/* functions to manipulate the transport id in msg block */ #define BCM_VK_MSG_Q_SHIFT 4 #define BCM_VK_MSG_Q_MASK 0xF #define BCM_VK_MSG_ID_MASK 0xFFF
#define BCM_VK_DMA_DRAIN_MAX_MS 2000
/* number x q_size will be the max number of msg processed per loop */ #define BCM_VK_MSG_PROC_MAX_LOOP 2
/* module parameter */ staticbool hb_mon = true;
module_param(hb_mon, bool, 0444);
MODULE_PARM_DESC(hb_mon, "Monitoring heartbeat continuously.\n"); staticint batch_log = 1;
module_param(batch_log, int, 0444);
MODULE_PARM_DESC(batch_log, "Max num of logs per batch operation.\n");
/* use irqsave version as this maybe called inside timer interrupt */
spin_lock_irqsave(&vk->host_alert_lock, flags);
alert->notfs |= bit_mask;
spin_unlock_irqrestore(&vk->host_alert_lock, flags);
if (test_and_set_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload) == 0)
queue_work(vk->wq_thread, &vk->wq_work);
}
/* * Heartbeat related defines * The heartbeat from host is a last resort. If stuck condition happens * on the card, firmware is supposed to detect it. Therefore, the heartbeat * values used will be more relaxed on the driver, which need to be bigger * than the watchdog timeout on the card. The watchdog timeout on the card * is 20s, with a jitter of 2s => 22s. We use a value of 27s here.
*/ #define BCM_VK_HB_TIMER_S 3 #define BCM_VK_HB_TIMER_VALUE (BCM_VK_HB_TIMER_S * HZ) #define BCM_VK_HB_LOST_MAX (27 / BCM_VK_HB_TIMER_S)
if (bcm_vk_drv_access_ok(vk) && hb_mon_is_on()) { /* read uptime from register and compare */
uptime_s = vkread32(vk, BAR_0, BAR_OS_UPTIME);
if (uptime_s == hb->last_uptime)
hb->lost_cnt++; else/* reset to avoid accumulation */
hb->lost_cnt = 0;
dev_dbg(&vk->pdev->dev, "Last uptime %d current %d, lost %d\n",
hb->last_uptime, uptime_s, hb->lost_cnt);
/* * if the interface goes down without any activity, a value * of 0xFFFFFFFF will be continuously read, and the detection * will be happened eventually.
*/
hb->last_uptime = uptime_s;
} else { /* reset heart beat lost cnt */
hb->lost_cnt = 0;
}
/* check if it is in reset, if so, don't allow */ if (vk->reset_pid) {
dev_err(&vk->pdev->dev, "No context allowed during reset by pid %d\n",
vk->reset_pid);
goto in_reset_exit;
}
for (i = 0; i < ARRAY_SIZE(vk->ctx); i++) { if (!vk->ctx[i].in_use) {
vk->ctx[i].in_use = true;
ctx = &vk->ctx[i]; break;
}
}
if (!ctx) {
dev_err(&vk->pdev->dev, "All context in use\n");
goto all_in_use_exit;
}
/* set the pid and insert it to hash table */
ctx->pid = pid;
ctx->hash_idx = hash_idx;
list_add_tail(&ctx->node, &vk->pid_ht[hash_idx].head);
spin_lock(&vk->msg_id_lock); while (test_bit_count < (VK_MSG_ID_BITMAP_SIZE - 1)) { /* * first time come in this loop, msg_id will be 0 * and the first one tested will be 1. We skip * VK_SIMPLEX_MSG_ID (0) for one way host2vk * communication
*/
vk->msg_id++; if (vk->msg_id == VK_MSG_ID_BITMAP_SIZE)
vk->msg_id = 1;
if (!vk->ctx[idx].in_use) {
dev_err(&vk->pdev->dev, "context[%d] not in use!\n", idx);
} else {
vk->ctx[idx].in_use = false;
vk->ctx[idx].miscdev = NULL;
/* Remove it from hash list and see if it is the last one. */
list_del(&ctx->node);
hash_idx = ctx->hash_idx;
list_for_each_entry(entry, &vk->pid_ht[hash_idx].head, node) { if (entry->pid == pid)
count++;
}
}
spin_unlock(&vk->ctx_lock);
return count;
}
staticvoid bcm_vk_free_wkent(struct device *dev, struct bcm_vk_wkent *entry)
{ int proc_cnt;
bcm_vk_sg_free(dev, entry->dma, VK_DMA_MAX_ADDRS, &proc_cnt); if (proc_cnt)
atomic_dec(&entry->ctx->dma_cnt);
/* * Function to sync up the messages queue info that is provided by BAR1
*/ int bcm_vk_sync_msgq(struct bcm_vk *vk, bool force_sync)
{ struct bcm_vk_msgq __iomem *msgq; struct device *dev = &vk->pdev->dev;
u32 msgq_off;
u32 num_q; struct bcm_vk_msg_chan *chan_list[] = {&vk->to_v_msg_chan,
&vk->to_h_msg_chan}; struct bcm_vk_msg_chan *chan; int i, j; int ret = 0;
/* * If the driver is loaded at startup where vk OS is not up yet, * the msgq-info may not be available until a later time. In * this case, we skip and the sync function is supposed to be * called again.
*/ if (!bcm_vk_msgq_marker_valid(vk)) {
dev_info(dev, "BAR1 msgq marker not initialized.\n"); return -EAGAIN;
}
/* first msgq location */
msgq = vk->bar[BAR_1] + msgq_off;
/* * if this function is called when it is already inited, * something is wrong
*/ if (bcm_vk_drv_access_ok(vk) && !force_sync) {
dev_err(dev, "Msgq info already in sync\n"); return -EPERM;
}
for (i = 0; i < ARRAY_SIZE(chan_list); i++) {
chan = chan_list[i];
memset(chan->sync_qinfo, 0, sizeof(chan->sync_qinfo));
/* if not enough space, return EAGAIN and let app handles it */
retry = 0; while ((avail < entry->to_v_blks) &&
(retry++ < BCM_VK_H2VK_ENQ_RETRY)) {
mutex_unlock(&chan->msgq_mutex);
/* at this point, mutex is taken and there is enough space */
entry->seq_num = seq_num++; /* update debug seq number */
wr_idx = readl_relaxed(&msgq->wr_idx);
if (wr_idx >= qinfo->q_size) {
dev_crit(dev, "Invalid wr_idx 0x%x => max 0x%x!",
wr_idx, qinfo->q_size);
bcm_vk_blk_drv_access(vk);
bcm_vk_set_host_alert(vk, ERR_LOG_HOST_PCIE_DWN); goto idx_err;
}
dst = msgq_blk_addr(qinfo, wr_idx); for (i = 0; i < entry->to_v_blks; i++) {
memcpy_toio(dst, src, sizeof(*dst));
/* flush the write pointer */
writel(wr_idx, &msgq->wr_idx);
/* log new info for debugging */
dev_dbg(dev, "MsgQ[%d] [Rd Wr] = [%d %d] blks inserted %d - Q = [u-%d a-%d]/%d\n",
readl_relaxed(&msgq->num),
readl_relaxed(&msgq->rd_idx),
wr_idx,
entry->to_v_blks,
msgq_occupied(msgq, qinfo),
msgq_avail_space(msgq, qinfo),
readl_relaxed(&msgq->size)); /* * press door bell based on queue number. 1 is added to the wr_idx * to avoid the value of 0 appearing on the VK side to distinguish * from initial value.
*/
bcm_to_v_q_doorbell(vk, q_num, wr_idx + 1);
idx_err:
mutex_unlock(&chan->msgq_mutex); return 0;
}
/* * check if the marker is still good. Sometimes, the PCIe interface may * have gone done, and if so and we ship down thing based on broken * values, kernel may panic.
*/ if (!bcm_vk_msgq_marker_valid(vk)) {
dev_info(dev, "PCIe comm chan - invalid marker (0x%x)!\n",
vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY)); return -EINVAL;
}
/* fill up necessary data */
entry->to_v_msg[0].function_id = VK_FID_SHUTDOWN;
set_q_num(&entry->to_v_msg[0], q_num);
set_msg_id(&entry->to_v_msg[0], VK_SIMPLEX_MSG_ID);
/* * don't send down or do anything if message queue is not initialized * and if it is the reset session, clear it.
*/ if (!bcm_vk_drv_access_ok(vk)) { if (vk->reset_pid == pid)
vk->reset_pid = 0; return -EPERM;
}
dev_dbg(dev, "No more sessions, shut down pid %d\n", pid);
/* only need to do it if it is not the reset process */ if (vk->reset_pid != pid)
rc = bcm_vk_send_shutdown_msg(vk, VK_SHUTDOWN_PID, pid, q_num); else /* put reset_pid to 0 if it is exiting last session */
vk->reset_pid = 0;
/* * drain all the messages from the queues, and find its pending * entry in the to_v queue, based on msg_id & q_num, and move the * entry to the to_h pending queue, waiting for user space * program to extract
*/
mutex_lock(&chan->msgq_mutex);
/* * Make a local copy and get pointer to src blk * The rd_idx is masked before getting the pointer to * avoid out of bound access in case the interface goes * down. It will end up pointing to the last block in * the buffer, but subsequent src->size check would be * able to catch this.
*/
src = msgq_blk_addr(qinfo, rd_idx & qinfo->q_mask);
src_size = readb(&src->size);
if ((rd_idx >= qinfo->q_size) ||
(src_size > (qinfo->q_size - 1))) {
dev_crit(dev, "Invalid rd_idx 0x%x or size 0x%x => max 0x%x!",
rd_idx, src_size, qinfo->q_size);
bcm_vk_blk_drv_access(vk);
bcm_vk_set_host_alert(vk,
ERR_LOG_HOST_PCIE_DWN); goto idx_err;
}
num_blks = src_size + 1;
data = kzalloc(num_blks * VK_MSGQ_BLK_SIZE, GFP_KERNEL); if (data) { /* copy messages and linearize it */
dst = data; for (j = 0; j < num_blks; j++) {
memcpy_fromio(dst, src, sizeof(*dst));
dst++;
rd_idx = msgq_inc(qinfo, rd_idx, 1);
src = msgq_blk_addr(qinfo, rd_idx);
}
total++;
} else { /* * if we could not allocate memory in kernel, * that is fatal.
*/
dev_crit(dev, "Kernel mem allocation failure.\n");
total = -ENOMEM; goto idx_err;
}
/* flush rd pointer after a message is dequeued */
writel(rd_idx, &msgq->rd_idx);
/* * No need to search if it is an autonomous one-way * message from driver, as these messages do not bear * a to_v pending item. Currently, only the shutdown * message falls into this category.
*/ if (data->function_id == VK_FID_SHUTDOWN) {
kfree(data); continue;
}
msg_id = get_msg_id(data); /* lookup original message in to_v direction */
entry = bcm_vk_dequeue_pending(vk,
&vk->to_v_msg_chan,
q_num,
msg_id);
/* * if there is message to does not have prior send, * this is the location to add here
*/ if (entry) {
entry->to_h_blks = num_blks;
entry->to_h_msg = data;
bcm_vk_append_pendq(&vk->to_h_msg_chan,
q_num, entry);
} else { if (cnt++ < batch_log)
dev_info(dev, "Could not find MsgId[0x%x] for resp func %d bmap %d\n",
msg_id, data->function_id,
test_bit(msg_id, vk->bmap));
kfree(data);
} /* Fetch wr_idx to handle more back-to-back events */
wr_idx = readl(&msgq->wr_idx);
/* * cap the max so that even we try to handle more back-to-back events, * so that it won't hold CPU too long or in case rd/wr idexes are * corrupted which triggers infinite looping.
*/ if (++msg_processed >= max_msg_to_process) {
dev_warn(dev, "Q[%d] Per loop processing exceeds %d\n",
q_num, max_msg_to_process);
exit_loop = true;
}
}
}
idx_err:
mutex_unlock(&chan->msgq_mutex);
dev_dbg(dev, "total %d drained from queues\n", total);
return total;
}
/* * init routine for all required data structures
*/ staticint bcm_vk_data_init(struct bcm_vk *vk)
{ int i;
spin_lock_init(&vk->ctx_lock); for (i = 0; i < ARRAY_SIZE(vk->ctx); i++) {
vk->ctx[i].in_use = false;
vk->ctx[i].idx = i; /* self identity */
vk->ctx[i].miscdev = NULL;
}
spin_lock_init(&vk->msg_id_lock);
spin_lock_init(&vk->host_alert_lock);
vk->msg_id = 0;
/* initialize hash table */ for (i = 0; i < VK_PID_HT_SZ; i++)
INIT_LIST_HEAD(&vk->pid_ht[i].head);
/* get a context and set it up for file */
ctx = bcm_vk_get_ctx(vk, task_tgid_nr(current)); if (!ctx) {
dev_err(dev, "Error allocating context\n");
rc = -ENOMEM;
} else { /* * set up context and replace private data with context for * other methods to use. Reason for the context is because * it is allowed for multiple sessions to open the sysfs, and * for each file open, when upper layer query the response, * only those that are tied to a specific open should be * returned. The context->idx will be used for such binding
*/
ctx->miscdev = miscdev;
p_file->private_data = ctx;
dev_dbg(dev, "ctx_returned with idx %d, pid %d\n",
ctx->idx, ctx->pid);
} return rc;
}
/* * search through the pendq on the to_h chan, and return only those * that belongs to the same context. Search is always from the high to * the low priority queues
*/
spin_lock(&chan->pendq_lock); for (q_num = 0; q_num < chan->q_nr; q_num++) {
list_for_each_entry(iter, &chan->pendq[q_num], node) { if (iter->ctx->idx == ctx->idx) { if (count >=
(iter->to_h_blks * VK_MSGQ_BLK_SIZE)) {
list_del(&iter->node);
atomic_dec(&ctx->pend_cnt);
entry = iter;
} else { /* buffer not big enough */
rc = -EMSGSIZE;
} goto read_loop_exit;
}
}
}
read_loop_exit:
spin_unlock(&chan->pendq_lock);
if (entry) { /* retrieve the passed down msg_id */
set_msg_id(&entry->to_h_msg[0], entry->usr_msg_id);
rsp_length = entry->to_h_blks * VK_MSGQ_BLK_SIZE; if (copy_to_user(buf, entry->to_h_msg, rsp_length) == 0)
rc = rsp_length;
/* * in this case, return just the first block, so * that app knows what size it is looking for.
*/
set_msg_id(&tmp_msg, entry->usr_msg_id);
tmp_msg.size = entry->to_h_blks - 1; if (copy_to_user(buf, &tmp_msg, VK_MSGQ_BLK_SIZE) != 0) {
dev_err(dev, "Error return 1st block in -EMSGSIZE\n");
rc = -EFAULT;
}
} return rc;
}
/* first, do sanity check where count should be multiple of basic blk */ if (count & (VK_MSGQ_BLK_SIZE - 1)) {
dev_err(dev, "Failure with size %zu not multiple of %zu\n",
count, VK_MSGQ_BLK_SIZE);
rc = -EINVAL; goto write_err;
}
/* allocate the work entry + buffer for size count and inband sgl */
entry = kzalloc(sizeof(*entry) + count + vk->ib_sgl_size,
GFP_KERNEL); if (!entry) {
rc = -ENOMEM; goto write_err;
}
/* now copy msg from user space, and then formulate the work entry */ if (copy_from_user(&entry->to_v_msg[0], buf, count)) {
rc = -EFAULT; goto write_free_ent;
}
dev_dbg(dev, "[Q-%d]Message ctx id %d, usr_msg_id 0x%x sent msg_id 0x%x\n",
ctx->q_num, ctx->idx, entry->usr_msg_id,
get_msg_id(&entry->to_v_msg[0]));
if (entry->to_v_msg[0].function_id == VK_FID_TRANS_BUF) { /* Convert any pointers to sg list */ unsignedint num_planes; int dir; struct _vk_data *data;
/* * check if we are in reset, if so, no buffer transfer is * allowed and return error.
*/ if (vk->reset_pid) {
dev_dbg(dev, "No Transfer allowed during reset, pid %d.\n",
ctx->pid);
rc = -EACCES; goto write_free_msgid;
}
num_planes = entry->to_v_msg[0].cmd & VK_CMD_PLANES_MASK; if ((entry->to_v_msg[0].cmd & VK_CMD_MASK) == VK_CMD_DOWNLOAD)
dir = DMA_FROM_DEVICE; else
dir = DMA_TO_DEVICE;
/* Calculate vk_data location */ /* Go to end of the message */
msg_size = entry->to_v_msg[0].size; if (msg_size > entry->to_v_blks) {
rc = -EMSGSIZE; goto write_free_msgid;
}
data = (struct _vk_data *)&entry->to_v_msg[msg_size + 1];
/* Now back up to the start of the pointers */
data -= num_planes;
/* Convert user addresses to DMA SG List */
rc = bcm_vk_sg_alloc(dev, entry->dma, dir, data, num_planes); if (rc) goto write_free_msgid;
atomic_inc(&ctx->dma_cnt); /* try to embed inband sgl */
sgl_extra_blks = bcm_vk_append_ib_sgl(vk, entry, data,
num_planes);
entry->to_v_blks += sgl_extra_blks;
entry->to_v_msg[0].size += sgl_extra_blks;
} elseif (entry->to_v_msg[0].function_id == VK_FID_INIT &&
entry->to_v_msg[0].context_id == VK_NEW_CTX) { /* * Init happens in 2 stages, only the first stage contains the * pid that needs translating.
*/
pid_t org_pid, pid;
/* * translate the pid into the unique host space as user * may run sessions inside containers or process * namespaces.
*/ #define VK_MSG_PID_MASK 0xffffff00 #define VK_MSG_PID_SH 8
org_pid = (entry->to_v_msg[0].arg & VK_MSG_PID_MASK)
>> VK_MSG_PID_SH;
/* * store work entry to pending queue until a response is received. * This needs to be done before enqueuing the message
*/
bcm_vk_append_pendq(&vk->to_v_msg_chan, q_num, entry);
rc = bcm_to_v_msg_enqueue(vk, entry); if (rc) {
dev_err(dev, "Fail to enqueue msg to to_v queue\n");
/* remove message from pending list */
entry = bcm_vk_dequeue_pending
(vk,
&vk->to_v_msg_chan,
q_num,
get_msg_id(&entry->to_v_msg[0])); goto write_free_ent;
}
/* * if there are outstanding DMA transactions, need to delay long enough * to ensure that the card side would have stopped touching the host buffer * and its SGL list. A race condition could happen if the host app is killed * abruptly, eg kill -9, while some DMA transfer orders are still inflight. * Nothing could be done except for a delay as host side is running in a * completely async fashion.
*/
start_time = jiffies;
timeout = start_time + msecs_to_jiffies(BCM_VK_DMA_DRAIN_MAX_MS); do { if (time_after(jiffies, timeout)) {
dev_warn(dev, "%d dma still pending for [fd-%d] pid %d\n",
dma_cnt, ctx->idx, pid); break;
}
dma_cnt = atomic_read(&ctx->dma_cnt);
cpu_relax();
cond_resched();
} while (dma_cnt);
dev_dbg(dev, "Draining for [fd-%d] pid %d - delay %d ms\n",
ctx->idx, pid, jiffies_to_msecs(jiffies - start_time));
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.