uint mod_num_sdma;
module_param_named(num_sdma, mod_num_sdma, uint, S_IRUGO);
MODULE_PARM_DESC(num_sdma, "Set max number SDMA engines to use");
static uint sdma_desct_intr = SDMA_DESC_INTR;
module_param_named(desct_intr, sdma_desct_intr, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(desct_intr, "Number of SDMA descriptor before interrupt");
#define SDMA_WAIT_BATCH_SIZE 20 /* max wait time for a SDMA engine to indicate it has halted */ #define SDMA_ERR_HALT_TIMEOUT 10 /* ms */ /* all SDMA engine errors that cause a halt */
/* * sdma_wait_for_packet_egress() - wait for the VL FIFO occupancy for * sdma engine 'sde' to drop to 0.
*/ staticvoid sdma_wait_for_packet_egress(struct sdma_engine *sde, int pause)
{
u64 off = 8 * sde->this_idx; struct hfi1_devdata *dd = sde->dd; int lcnt = 0;
u64 reg_prev;
u64 reg = 0;
while (1) {
reg_prev = reg;
reg = read_csr(dd, off + SEND_EGRESS_SEND_DMA_STATUS);
reg &= SDMA_EGRESS_PACKET_OCCUPANCY_SMASK;
reg >>= SDMA_EGRESS_PACKET_OCCUPANCY_SHIFT; if (reg == 0) break; /* counter is reest if accupancy count changes */ if (reg != reg_prev)
lcnt = 0; if (lcnt++ > 500) { /* timed out - bounce the link */
dd_dev_err(dd, "%s: engine %u timeout waiting for packets to egress, remaining count %u, bouncing link\n",
__func__, sde->this_idx, (u32)reg);
queue_work(dd->pport->link_wq,
&dd->pport->link_bounce_work); break;
}
udelay(1);
}
}
/* * sdma_wait() - wait for packet egress to complete for all SDMA engines, * and pause for credit return.
*/ void sdma_wait(struct hfi1_devdata *dd)
{ int i;
for (i = 0; i < dd->num_sdma; i++) { struct sdma_engine *sde = &dd->per_sdma[i];
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
trace_hfi1_sdma_out_sn(sde, tx->sn); if (WARN_ON_ONCE(sde->head_sn != tx->sn))
dd_dev_err(sde->dd, "expected %llu got %llu\n",
sde->head_sn, tx->sn);
sde->head_sn++; #endif
__sdma_txclean(sde->dd, tx); if (complete)
(*complete)(tx, res); if (iowait_sdma_dec(wait))
iowait_drain_wakeup(wait);
}
/* * Complete all the sdma requests with a SDMA_TXREQ_S_ABORTED status * * Depending on timing there can be txreqs in two places: * - in the descq ring * - in the flush list * * To avoid ordering issues the descq ring needs to be flushed * first followed by the flush list. * * This routine is called from two places * - From a work queue item * - Directly from the state machine just before setting the * state to running * * Must be called with head_lock held *
*/ staticvoid sdma_flush(struct sdma_engine *sde)
{ struct sdma_txreq *txp, *txp_next;
LIST_HEAD(flushlist); unsignedlong flags;
uint seq;
/* flush from head to tail */
sdma_flush_descq(sde);
spin_lock_irqsave(&sde->flushlist_lock, flags); /* copy flush list */
list_splice_init(&sde->flushlist, &flushlist);
spin_unlock_irqrestore(&sde->flushlist_lock, flags); /* flush from flush list */
list_for_each_entry_safe(txp, txp_next, &flushlist, list)
complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED); /* wakeup QPs orphaned on the dmawait list */ do { struct iowait *w, *nw;
seq = read_seqbegin(&sde->waitlock); if (!list_empty(&sde->dmawait)) {
write_seqlock(&sde->waitlock);
list_for_each_entry_safe(w, nw, &sde->dmawait, list) { if (w->wakeup) {
w->wakeup(w, SDMA_AVAIL_REASON);
list_del_init(&w->list);
}
}
write_sequnlock(&sde->waitlock);
}
} while (read_seqretry(&sde->waitlock, seq));
}
/* * Fields a work request for flushing the descq ring * and the flush list * * If the engine has been brought to running during * the scheduling delay, the flush is ignored, assuming * that the process of bringing the engine to running * would have done this flush prior to going to running. *
*/ staticvoid sdma_field_flush(struct work_struct *work)
{ unsignedlong flags; struct sdma_engine *sde =
container_of(work, struct sdma_engine, flush_worker);
write_seqlock_irqsave(&sde->head_lock, flags); if (!__sdma_running(sde))
sdma_flush(sde);
write_sequnlock_irqrestore(&sde->head_lock, flags);
}
timeout = jiffies + msecs_to_jiffies(SDMA_ERR_HALT_TIMEOUT); while (1) {
statuscsr = read_sde_csr(sde, SD(STATUS));
statuscsr &= SD(STATUS_ENG_HALTED_SMASK); if (statuscsr) break; if (time_after(jiffies, timeout)) {
dd_dev_err(sde->dd, "SDMA engine %d - timeout waiting for engine to halt\n",
sde->this_idx); /* * Continue anyway. This could happen if there was * an uncorrectable error in the wrong spot.
*/ break;
}
usleep_range(80, 120);
}
/* check progress on each engine except the current one */ if (curr_sde == sde) continue; /* * We must lock interrupts when acquiring sde->lock, * to avoid a deadlock if interrupt triggers and spins on * the same lock on same CPU
*/
spin_lock_irqsave(&curr_sde->tail_lock, flags);
write_seqlock(&curr_sde->head_lock);
/* * flush ring for recovery
*/ staticvoid sdma_flush_descq(struct sdma_engine *sde)
{
u16 head, tail; int progress = 0; struct sdma_txreq *txp = get_txhead(sde);
/* The reason for some of the complexity of this code is that * not all descriptors have corresponding txps. So, we have to * be able to skip over descs until we wander into the range of * the next txp on the list.
*/
head = sde->descq_head & sde->sdma_mask;
tail = sde->descq_tail & sde->sdma_mask; while (head != tail) { /* advance head, wrap if needed */
head = ++sde->descq_head & sde->sdma_mask; /* if now past this txp's descs, do the callback */ if (txp && txp->next_descq_idx == head) { /* remove from list */
sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
complete_tx(sde, txp, SDMA_TXREQ_S_ABORTED);
trace_hfi1_sdma_progress(sde, head, tail, txp);
txp = get_txhead(sde);
}
progress++;
} if (progress)
sdma_desc_avail(sde, sdma_descq_freecnt(sde));
}
/* * At this point, the following should always be true: * - We are halted, so no more descriptors are getting retired. * - We are not running, so no one is submitting new work. * - Only we can send the e40_sw_cleaned, so we can't start * running again until we say so. So, the active list and * descq are ours to play with.
*/
/* * In the error clean up sequence, software clean must be called * before the hardware clean so we can use the hardware head in * the progress routine. A hardware clean or SPC unfreeze will * reset the hardware head. * * Process all retired requests. The progress routine will use the * latest physical hardware head - we are not running so speed does * not matter.
*/
sdma_make_progress(sde, 0);
sdma_flush(sde);
/* * Reset our notion of head and tail. * Note that the HW registers have been reset via an earlier * clean up.
*/
sde->descq_tail = 0;
sde->descq_head = 0;
sde->desc_avail = sdma_descq_freecnt(sde);
*sde->head_dma = 0;
/* Releasing this reference means the state machine has stopped. */
sdma_put(ss);
/* stop waiting for all unfreeze events to complete */
atomic_set(&sde->dd->sdma_unfreeze_count, -1);
wake_up_interruptible(&sde->dd->sdma_unfreeze_wq);
}
/** * sdma_get_descq_cnt() - called when device probed * * Return a validated descq count. * * This is currently only used in the verbs initialization to build the tx * list. * * This will probably be deleted in favor of a more scalable approach to * alloc tx's. *
*/
u16 sdma_get_descq_cnt(void)
{
u16 count = sdma_descq_cnt;
if (!count) return SDMA_DESCQ_CNT; /* count must be a power of 2 greater than 64 and less than * 32768. Otherwise return default.
*/ if (!is_power_of_2(count)) return SDMA_DESCQ_CNT; if (count < 64 || count > 32768) return SDMA_DESCQ_CNT; return count;
}
/** * sdma_engine_get_vl() - return vl for a given sdma engine * @sde: sdma engine * * This function returns the vl mapped to a given engine, or an error if * the mapping can't be found. The mapping fields are protected by RCU.
*/ int sdma_engine_get_vl(struct sdma_engine *sde)
{ struct hfi1_devdata *dd = sde->dd; struct sdma_vl_map *m;
u8 vl;
if (sde->this_idx >= TXE_NUM_SDMA_ENGINES) return -EINVAL;
rcu_read_lock();
m = rcu_dereference(dd->sdma_map); if (unlikely(!m)) {
rcu_read_unlock(); return -EINVAL;
}
vl = m->engine_to_vl[sde->this_idx];
rcu_read_unlock();
return vl;
}
/** * sdma_select_engine_vl() - select sdma engine * @dd: devdata * @selector: a spreading factor * @vl: this vl * * * This function returns an engine based on the selector and a vl. The * mapping fields are protected by RCU.
*/ struct sdma_engine *sdma_select_engine_vl( struct hfi1_devdata *dd,
u32 selector,
u8 vl)
{ struct sdma_vl_map *m; struct sdma_map_elem *e; struct sdma_engine *rval;
/* NOTE This should only happen if SC->VL changed after the initial * checks on the QP/AH * Default will return engine 0 below
*/ if (vl >= num_vls) {
rval = NULL; goto done;
}
rcu_read_lock();
m = rcu_dereference(dd->sdma_map); if (unlikely(!m)) {
rcu_read_unlock(); return &dd->per_sdma[0];
}
e = m->map[vl & m->mask];
rval = e->sde[selector & e->mask];
rcu_read_unlock();
/** * sdma_select_engine_sc() - select sdma engine * @dd: devdata * @selector: a spreading factor * @sc5: the 5 bit sc * * * This function returns an engine based on the selector and an sc.
*/ struct sdma_engine *sdma_select_engine_sc( struct hfi1_devdata *dd,
u32 selector,
u8 sc5)
{
u8 vl = sc_to_vlt(dd, sc5);
/* * sdma_select_user_engine() - select sdma engine based on user setup * @dd: devdata * @selector: a spreading factor * @vl: this vl * * This function returns an sdma engine for a user sdma request. * User defined sdma engine affinity setting is honored when applicable, * otherwise system default sdma engine mapping is used. To ensure correct * ordering, the mapping from <selector, vl> to sde must remain unchanged.
*/ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
u32 selector, u8 vl)
{ struct sdma_rht_node *rht_node; struct sdma_engine *sde = NULL; unsignedlong cpu_id;
/* * To ensure that always the same sdma engine(s) will be * selected make sure the process is pinned to this CPU only.
*/ if (current->nr_cpus_allowed != 1) goto out;
/* only need to check the first ctr entries for a match */ for (i = 0; i < map->ctr; i++) { if (map->sde[i] == sde) {
memmove(&map->sde[i], &map->sde[i + 1],
(map->ctr - i - 1) * sizeof(map->sde[0]));
map->ctr--;
pow = roundup_pow_of_two(map->ctr ? : 1);
map->mask = pow - 1;
sdma_populate_sde_map(map); break;
}
}
}
/* * Prevents concurrent reads and writes of the sdma engine cpu_mask
*/ static DEFINE_MUTEX(process_to_sde_mutex);
ret = zalloc_cpumask_var(&mask, GFP_KERNEL); if (!ret) return -ENOMEM;
ret = zalloc_cpumask_var(&new_mask, GFP_KERNEL); if (!ret) {
free_cpumask_var(mask); return -ENOMEM;
}
ret = cpulist_parse(buf, mask); if (ret) goto out_free;
if (!cpumask_subset(mask, cpu_online_mask)) {
dd_dev_warn(sde->dd, "Invalid CPU mask\n");
ret = -EINVAL; goto out_free;
}
for_each_cpu(cpu, mask) { /* Check if we have this already mapped */ if (cpumask_test_cpu(cpu, &sde->cpu_mask)) {
cpumask_set_cpu(cpu, new_mask); continue;
}
rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpu,
sdma_rht_params); if (!rht_node) {
rht_node = kzalloc(sizeof(*rht_node), GFP_KERNEL); if (!rht_node) {
ret = -ENOMEM; goto out;
}
ret = rhashtable_insert_fast(dd->sdma_rht,
&rht_node->node,
sdma_rht_params); if (ret) {
kfree(rht_node->map[vl]);
kfree(rht_node);
dd_dev_err(sde->dd, "Failed to set process to sde affinity for cpu %lu\n",
cpu); goto out;
}
} else { int ctr, pow;
/* Add new user mappings */ if (!rht_node->map[vl])
rht_node->map[vl] = kzalloc(sz, GFP_KERNEL);
if (!rht_node->map[vl]) {
ret = -ENOMEM; goto out;
}
for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++)
kfree(rht_node->map[i]);
kfree(rht_node);
}
/** * sdma_seqfile_dump_cpu_list() - debugfs dump the cpu to sdma mappings * @s: seq file * @dd: hfi1_devdata * @cpuid: cpu id * * This routine dumps the process to sde mappings per cpu
*/ void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd, unsignedlong cpuid)
{ struct sdma_rht_node *rht_node; int i, j;
rht_node = rhashtable_lookup_fast(dd->sdma_rht, &cpuid,
sdma_rht_params); if (!rht_node) return;
seq_printf(s, "cpu%3lu: ", cpuid); for (i = 0; i < HFI1_MAX_VLS_SUPPORTED; i++) { if (!rht_node->map[i] || !rht_node->map[i]->ctr) continue;
seq_printf(s, " vl%d: [", i);
for (j = 0; j < rht_node->map[i]->ctr; j++) { if (!rht_node->map[i]->sde[j]) continue;
/** * sdma_map_init - called when # vls change * @dd: hfi1_devdata * @port: port number * @num_vls: number of vls * @vl_engines: per vl engine mapping (optional) * * This routine changes the mapping based on the number of vls. * * vl_engines is used to specify a non-uniform vl/engine loading. NULL * implies auto computing the loading and giving each VLs a uniform * distribution of engines per VL. * * The auto algorithm computes the sde_per_vl and the number of extra * engines. Any extra engines are added from the last VL on down. * * rcu locking is used here to control access to the mapping fields. * * If either the num_vls or num_sdma are non-power of 2, the array sizes * in the struct sdma_vl_map and the struct sdma_map_elem are rounded * up to the next highest power of 2 and the first entry is reused * in a round robin fashion. * * If an error occurs the map change is not done and the mapping is * not changed. *
*/ int sdma_map_init(struct hfi1_devdata *dd, u8 port, u8 num_vls, u8 *vl_engines)
{ int i, j; int extra, sde_per_vl; int engine = 0;
u8 lvl_engines[OPA_MAX_VLS]; struct sdma_vl_map *oldmap, *newmap;
if (!(dd->flags & HFI1_HAS_SEND_DMA)) return 0;
if (!vl_engines) { /* truncate divide */
sde_per_vl = dd->num_sdma / num_vls; /* extras */
extra = dd->num_sdma % num_vls;
vl_engines = lvl_engines; /* add extras from last vl down */ for (i = num_vls - 1; i >= 0; i--, extra--)
vl_engines[i] = sde_per_vl + (extra > 0 ? 1 : 0);
} /* build new map */
newmap = kzalloc( sizeof(struct sdma_vl_map) +
roundup_pow_of_two(num_vls) * sizeof(struct sdma_map_elem *),
GFP_KERNEL); if (!newmap) goto bail;
newmap->actual_vls = num_vls;
newmap->vls = roundup_pow_of_two(num_vls);
newmap->mask = (1 << ilog2(newmap->vls)) - 1; /* initialize back-map */ for (i = 0; i < TXE_NUM_SDMA_ENGINES; i++)
newmap->engine_to_vl[i] = -1; for (i = 0; i < newmap->vls; i++) { /* save for wrap around */ int first_engine = engine;
if (i < newmap->actual_vls) { int sz = roundup_pow_of_two(vl_engines[i]);
/* only allocate once */
newmap->map[i] = kzalloc( sizeof(struct sdma_map_elem) +
sz * sizeof(struct sdma_engine *),
GFP_KERNEL); if (!newmap->map[i]) goto bail;
newmap->map[i]->mask = (1 << ilog2(sz)) - 1; /* assign engines */ for (j = 0; j < sz; j++) {
newmap->map[i]->sde[j] =
&dd->per_sdma[engine]; if (++engine >= first_engine + vl_engines[i]) /* wrap back to first engine */
engine = first_engine;
} /* assign back-map */ for (j = 0; j < vl_engines[i]; j++)
newmap->engine_to_vl[first_engine + j] = i;
} else { /* just re-use entry without allocating */
newmap->map[i] = newmap->map[i % num_vls];
}
engine = first_engine + vl_engines[i];
} /* newmap in hand, save old map */
spin_lock_irq(&dd->sde_map_lock);
oldmap = rcu_dereference_protected(dd->sdma_map,
lockdep_is_held(&dd->sde_map_lock));
spin_unlock_irq(&dd->sde_map_lock); /* success, free any old map after grace period */ if (oldmap)
call_rcu(&oldmap->list, sdma_map_rcu_callback); return 0;
bail: /* free any partial allocation */
sdma_map_free(newmap); return -ENOMEM;
}
/** * sdma_clean - Clean up allocated memory * @dd: struct hfi1_devdata * @num_engines: num sdma engines * * This routine can be called regardless of the success of * sdma_init()
*/ void sdma_clean(struct hfi1_devdata *dd, size_t num_engines)
{
size_t i; struct sdma_engine *sde;
if (dd->sdma_pad_dma) {
dma_free_coherent(&dd->pcidev->dev, SDMA_PAD,
(void *)dd->sdma_pad_dma,
dd->sdma_pad_phys);
dd->sdma_pad_dma = NULL;
dd->sdma_pad_phys = 0;
} if (dd->sdma_heads_dma) {
dma_free_coherent(&dd->pcidev->dev, dd->sdma_heads_size,
(void *)dd->sdma_heads_dma,
dd->sdma_heads_phys);
dd->sdma_heads_dma = NULL;
dd->sdma_heads_phys = 0;
} for (i = 0; dd->per_sdma && i < num_engines; ++i) {
sde = &dd->per_sdma[i];
/* Create a mask specifically for each interrupt source */
sde->int_mask = (u64)1 << (0 * TXE_NUM_SDMA_ENGINES +
this_idx);
sde->progress_mask = (u64)1 << (1 * TXE_NUM_SDMA_ENGINES +
this_idx);
sde->idle_mask = (u64)1 << (2 * TXE_NUM_SDMA_ENGINES +
this_idx); /* Create a combined mask to cover all 3 interrupt sources */
sde->imask = sde->int_mask | sde->progress_mask |
sde->idle_mask;
spin_lock_init(&sde->tail_lock);
seqlock_init(&sde->head_lock);
spin_lock_init(&sde->senddmactrl_lock);
spin_lock_init(&sde->flushlist_lock);
seqlock_init(&sde->waitlock); /* insure there is always a zero bit */
sde->ahg_bits = 0xfffffffe00000000ULL;
sdma_set_state(sde, sdma_state_s00_hw_down);
/* set up reference counting */
kref_init(&sde->state.kref);
init_completion(&sde->state.comp);
dd->sdma_heads_size = L1_CACHE_BYTES * num_engines; /* Allocate memory for DMA of head registers to memory */
dd->sdma_heads_dma = dma_alloc_coherent(&dd->pcidev->dev,
dd->sdma_heads_size,
&dd->sdma_heads_phys,
GFP_KERNEL); if (!dd->sdma_heads_dma) {
dd_dev_err(dd, "failed to allocate SendDMA head memory\n"); goto bail;
}
/* Allocate memory for pad */
dd->sdma_pad_dma = dma_alloc_coherent(&dd->pcidev->dev, SDMA_PAD,
&dd->sdma_pad_phys, GFP_KERNEL); if (!dd->sdma_pad_dma) {
dd_dev_err(dd, "failed to allocate SendDMA pad memory\n"); goto bail;
}
/* assign each engine to different cacheline and init registers */
curr_head = (void *)dd->sdma_heads_dma; for (this_idx = 0; this_idx < num_engines; ++this_idx) { unsignedlong phys_offset;
/** * sdma_all_running() - called when the link goes up * @dd: hfi1_devdata * * This routine moves all engines to the running state.
*/ void sdma_all_running(struct hfi1_devdata *dd)
{ struct sdma_engine *sde; unsignedint i;
/* move all engines to running */ for (i = 0; i < dd->num_sdma; ++i) {
sde = &dd->per_sdma[i];
sdma_process_event(sde, sdma_event_e30_go_running);
}
}
/** * sdma_start() - called to kick off state processing for all engines * @dd: hfi1_devdata * * This routine is for kicking off the state processing for all required * sdma engines. Interrupts need to be working at this point. *
*/ void sdma_start(struct hfi1_devdata *dd)
{ unsigned i; struct sdma_engine *sde;
/* kick off the engines state processing */ for (i = 0; i < dd->num_sdma; ++i) {
sde = &dd->per_sdma[i];
sdma_process_event(sde, sdma_event_e10_go_hw_start);
}
}
/** * sdma_exit() - used when module is removed * @dd: hfi1_devdata
*/ void sdma_exit(struct hfi1_devdata *dd)
{ unsigned this_idx; struct sdma_engine *sde;
for (this_idx = 0; dd->per_sdma && this_idx < dd->num_sdma;
++this_idx) {
sde = &dd->per_sdma[this_idx]; if (!list_empty(&sde->dmawait))
dd_dev_err(dd, "sde %u: dmawait list not empty!\n",
sde->this_idx);
sdma_process_event(sde, sdma_event_e00_go_hw_down);
/* * This waits for the state machine to exit so it is not * necessary to kill the sdma_sw_clean_up_task to make sure * it is not running.
*/
sdma_finalput(&sde->state);
}
}
if (descp->pinning_ctx && descp->ctx_put)
descp->ctx_put(descp->pinning_ctx);
descp->pinning_ctx = NULL;
}
/* * return the mode as indicated by the first * descriptor in the tx.
*/ staticinline u8 ahg_mode(struct sdma_txreq *tx)
{ return (tx->descp[0].qw[1] & SDMA_DESC1_HEADER_MODE_SMASK)
>> SDMA_DESC1_HEADER_MODE_SHIFT;
}
/** * __sdma_txclean() - clean tx of mappings, descp *kmalloc's * @dd: hfi1_devdata for unmapping * @tx: tx request to clean * * This is used in the progress routine to clean the tx or * by the ULP to toss an in-process tx build. * * The code can be called multiple times without issue. *
*/ void __sdma_txclean( struct hfi1_devdata *dd, struct sdma_txreq *tx)
{
u16 i;
if (tx->num_desc) {
u8 skip = 0, mode = ahg_mode(tx);
/* unmap first */
sdma_unmap_desc(dd, &tx->descp[0]); /* determine number of AHG descriptors to skip */ if (mode > SDMA_AHG_APPLY_UPDATE1)
skip = mode >> 1; for (i = 1 + skip; i < tx->num_desc; i++)
sdma_unmap_desc(dd, &tx->descp[i]);
tx->num_desc = 0;
}
kfree(tx->coalesce_buf);
tx->coalesce_buf = NULL; /* kmalloc'ed descp */ if (unlikely(tx->desc_limit > ARRAY_SIZE(tx->descs))) {
tx->desc_limit = ARRAY_SIZE(tx->descs);
kfree(tx->descp);
}
}
if (unlikely(HFI1_CAP_IS_KSET(SDMA_HEAD_CHECK))) {
u16 cnt;
u16 swtail;
u16 swhead; int sane;
swhead = sde->descq_head & sde->sdma_mask; /* this code is really bad for cache line trading */
swtail = READ_ONCE(sde->descq_tail) & sde->sdma_mask;
cnt = sde->descq_cnt;
if (unlikely(!sane)) {
dd_dev_err(dd, "SDMA(%u) bad head (%s) hwhd=%u swhd=%u swtl=%u cnt=%u\n",
sde->this_idx,
use_dmahead ? "dma" : "kreg",
hwhead, swhead, swtail, cnt); if (use_dmahead) { /* try one more time, using csr */
use_dmahead = 0; goto retry;
} /* proceed as if no progress */
hwhead = swhead;
}
} return hwhead;
}
/* * This is called when there are send DMA descriptors that might be * available. * * This is called with head_lock held.
*/ staticvoid sdma_desc_avail(struct sdma_engine *sde, uint avail)
{ struct iowait *wait, *nw, *twait; struct iowait *waits[SDMA_WAIT_BATCH_SIZE];
uint i, n = 0, seq, tidx = 0;
do {
seq = read_seqbegin(&sde->waitlock); if (!list_empty(&sde->dmawait)) { /* at least one item */
write_seqlock(&sde->waitlock); /* Harvest waiters wanting DMA descriptors */
list_for_each_entry_safe(
wait,
nw,
&sde->dmawait,
list) {
u32 num_desc;
if (!wait->wakeup) continue; if (n == ARRAY_SIZE(waits)) break;
iowait_init_priority(wait);
num_desc = iowait_get_all_desc(wait); if (num_desc > avail) break;
avail -= num_desc; /* Find the top-priority wait memeber */ if (n) {
twait = waits[tidx];
tidx =
iowait_priority_update_top(wait,
twait,
n,
tidx);
}
list_del_init(&wait->list);
waits[n++] = wait;
}
write_sequnlock(&sde->waitlock); break;
}
} while (read_seqretry(&sde->waitlock, seq));
/* Schedule the top-priority entry first */ if (n)
waits[tidx]->wakeup(waits[tidx], SDMA_AVAIL_REASON);
for (i = 0; i < n; i++) if (i != tidx)
waits[i]->wakeup(waits[i], SDMA_AVAIL_REASON);
}
/* head_lock must be held */ staticvoid sdma_make_progress(struct sdma_engine *sde, u64 status)
{ struct sdma_txreq *txp = NULL; int progress = 0;
u16 hwhead, swhead; int idle_check_done = 0;
hwhead = sdma_gethead(sde);
/* The reason for some of the complexity of this code is that * not all descriptors have corresponding txps. So, we have to * be able to skip over descs until we wander into the range of * the next txp on the list.
*/
/* if now past this txp's descs, do the callback */ if (txp && txp->next_descq_idx == swhead) { /* remove from list */
sde->tx_ring[sde->tx_head++ & sde->sdma_mask] = NULL;
complete_tx(sde, txp, SDMA_TXREQ_S_OK); /* see if there is another txp */
txp = get_txhead(sde);
}
trace_hfi1_sdma_progress(sde, hwhead, swhead, txp);
progress++;
}
/* * The SDMA idle interrupt is not guaranteed to be ordered with respect * to updates to the dma_head location in host memory. The head * value read might not be fully up to date. If there are pending * descriptors and the SDMA idle interrupt fired then read from the * CSR SDMA head instead to get the latest value from the hardware. * The hardware SDMA head should be read at most once in this invocation * of sdma_make_progress(..) which is ensured by idle_check_done flag
*/ if ((status & sde->idle_mask) && !idle_check_done) {
u16 swtail;
sde->last_status = status; if (progress)
sdma_desc_avail(sde, sdma_descq_freecnt(sde));
}
/* * sdma_engine_interrupt() - interrupt handler for engine * @sde: sdma engine * @status: sdma interrupt reason * * Status is a mask of the 3 possible interrupts for this engine. It will * contain bits _only_ for this SDMA engine. It will contain at least one * bit, it may contain more.
*/ void sdma_engine_interrupt(struct sdma_engine *sde, u64 status)
{
trace_hfi1_sdma_engine_interrupt(sde, status);
write_seqlock(&sde->head_lock);
sdma_set_desc_cnt(sde, sdma_desct_intr); if (status & sde->idle_mask)
sde->idle_int_cnt++; elseif (status & sde->progress_mask)
sde->progress_int_cnt++; elseif (status & sde->int_mask)
sde->sdma_int_cnt++;
sdma_make_progress(sde, status);
write_sequnlock(&sde->head_lock);
}
/* * Set SendDmaLenGen and clear-then-set the MSB of the generation * count to enable generation checking and load the internal * generation counter.
*/
write_sde_csr(sde, SD(LEN_GEN),
(sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT));
write_sde_csr(sde, SD(LEN_GEN),
((sde->descq_cnt / 64) << SD(LEN_GEN_LENGTH_SHIFT)) |
(4ULL << SD(LEN_GEN_GENERATION_SHIFT)));
}
staticinlinevoid sdma_update_tail(struct sdma_engine *sde, u16 tail)
{ /* Commit writes to memory and advance the tail on the chip */
smp_wmb(); /* see get_txhead() */
writeq(tail, sde->tail_csr);
}
/* * This is called when changing to state s10_hw_start_up_halt_wait as * a result of send buffer errors or send DMA descriptor errors.
*/ staticvoid sdma_hw_start_up(struct sdma_engine *sde)
{
u64 reg;
for (i = 0; i < CCE_NUM_INT_CSRS; ++i) {
sdma_dumpstate_helper2(CCE_INT_STATUS);
sdma_dumpstate_helper2(CCE_INT_MASK);
sdma_dumpstate_helper2(CCE_INT_BLOCKED);
}
/* * This routine submits the indicated tx * * Space has already been guaranteed and * tail side of ring is locked. * * The hardware tail update is done * in the caller and that is facilitated * by returning the new tail. * * There is special case logic for ahg * to not add the generation number for * up to 2 descriptors that follow the * first descriptor. *
*/ staticinline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
{ int i;
u16 tail; struct sdma_desc *descp = tx->descp;
u8 skip = 0, mode = ahg_mode(tx);
sde->desc_avail = sdma_descq_freecnt(sde); if (tx->num_desc <= sde->desc_avail) return -EAGAIN; /* pulse the head_lock */ if (wait && iowait_ioww_to_iow(wait)->sleep) { unsigned seq;
seq = raw_seqcount_begin(
(const seqcount_t *)&sde->head_lock.seqcount);
ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent); if (ret == -EAGAIN)
sde->desc_avail = sdma_descq_freecnt(sde);
} else {
ret = -EBUSY;
} return ret;
}
/** * sdma_send_txreq() - submit a tx req to ring * @sde: sdma engine to use * @wait: SE wait structure to use when full (may be NULL) * @tx: sdma_txreq to submit * @pkts_sent: has any packet been sent yet? * * The call submits the tx into the ring. If a iowait structure is non-NULL * the packet will be queued to the list in wait. * * Return: * 0 - Success, -EINVAL - sdma_txreq incomplete, -EBUSY - no space in * ring (wait == NULL) * -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
*/ int sdma_send_txreq(struct sdma_engine *sde, struct iowait_work *wait, struct sdma_txreq *tx, bool pkts_sent)
{ int ret = 0;
u16 tail; unsignedlong flags;
/* user should have supplied entire packet */ if (unlikely(tx->tlen)) return -EINVAL;
tx->wait = iowait_ioww_to_iow(wait);
spin_lock_irqsave(&sde->tail_lock, flags);
retry: if (unlikely(!__sdma_running(sde))) goto unlock_noconn; if (unlikely(tx->num_desc > sde->desc_avail)) goto nodesc;
tail = submit_tx(sde, tx); if (wait)
iowait_sdma_inc(iowait_ioww_to_iow(wait));
sdma_update_tail(sde, tail);
unlock:
spin_unlock_irqrestore(&sde->tail_lock, flags); return ret;
unlock_noconn: if (wait)
iowait_sdma_inc(iowait_ioww_to_iow(wait));
tx->next_descq_idx = 0; #ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
tx->sn = sde->tail_sn++;
trace_hfi1_sdma_in_sn(sde, tx->sn); #endif
spin_lock(&sde->flushlist_lock);
list_add_tail(&tx->list, &sde->flushlist);
spin_unlock(&sde->flushlist_lock);
iowait_inc_wait_count(wait, tx->num_desc);
queue_work_on(sde->cpu, system_highpri_wq, &sde->flush_worker);
ret = -ECOMM; goto unlock;
nodesc:
ret = sdma_check_progress(sde, wait, tx, pkts_sent); if (ret == -EAGAIN) {
ret = 0; goto retry;
}
sde->descq_full_count++; goto unlock;
}
/** * sdma_send_txlist() - submit a list of tx req to ring * @sde: sdma engine to use * @wait: SE wait structure to use when full (may be NULL) * @tx_list: list of sdma_txreqs to submit * @count_out: pointer to a u16 which, after return will contain the total number of * sdma_txreqs removed from the tx_list. This will include sdma_txreqs * whose SDMA descriptors are submitted to the ring and the sdma_txreqs * which are added to SDMA engine flush list if the SDMA engine state is * not running. * * The call submits the list into the ring. * * If the iowait structure is non-NULL and not equal to the iowait list * the unprocessed part of the list will be appended to the list in wait. * * In all cases, the tx_list will be updated so the head of the tx_list is * the list of descriptors that have yet to be transmitted. * * The intent of this call is to provide a more efficient * way of submitting multiple packets to SDMA while holding the tail * side locking. * * Return: * 0 - Success, * -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.82 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.