// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2006-2009 DENX Software Engineering. * * Author: Yuri Tikhonov <yur@emcraft.com> * * Further porting to arch/powerpc by * Anatolij Gustschin <agust@denx.de>
*/
/* * This driver supports the asynchronous DMA copy and RAID engines available * on the AMCC PPC440SPe Processors. * Based on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x) * ADMA driver written by D.Williams.
*/
/* Since RXOR operations use the common register (MQ0_CF2H) for setting-up * the block size in transactions, then we do not allow to activate more than * only one RXOR transactions simultaneously. So use this var to store * the information about is RXOR currently active (PPC440SPE_RXOR_RUN bit is * set) or not (PPC440SPE_RXOR_RUN is clear).
*/ staticunsignedlong ppc440spe_rxor_state;
/* These are used in enable & check routines
*/ static u32 ppc440spe_r6_enabled; staticstruct ppc440spe_adma_chan *ppc440spe_r6_tchan; staticstruct completion ppc440spe_r6_test_comp;
staticint ppc440spe_adma_dma2rxor_prep_src( struct ppc440spe_adma_desc_slot *desc, struct ppc440spe_rxor *cursor, int index, int src_cnt, u32 addr); staticvoid ppc440spe_adma_dma2rxor_set_src( struct ppc440spe_adma_desc_slot *desc, int index, dma_addr_t addr); staticvoid ppc440spe_adma_dma2rxor_set_mult( struct ppc440spe_adma_desc_slot *desc, int index, u8 mult);
#ifdef ADMA_LL_DEBUG #define ADMA_LL_DBG(x) ({ if (1) x; 0; }) #else #define ADMA_LL_DBG(x) ({ if (0) x; 0; }) #endif
pr_debug("\n%s(%d):\nsrc: ", __func__, id); for (i = 0; i < src_cnt; i++)
pr_debug("\t0x%016llx ", src[i]);
pr_debug("dst: "); for (i = 0; i < 2; i++)
pr_debug("\t0x%016llx ", dst[i]);
}
pr_debug("\n%s(%d):\nsrc(coef): ", __func__, id); if (scf) { for (i = 0; i < src_cnt; i++)
pr_debug("\t0x%016llx(0x%02x) ", src[i], scf[i]);
} else { for (i = 0; i < src_cnt; i++)
pr_debug("\t0x%016llx(no) ", src[i]);
}
pr_debug("dst: "); for (i = 0; i < 2; i++)
pr_debug("\t0x%016llx ", src[src_cnt + i]);
}
/** * ppc440spe_desc_init_dma01pq - initialize the descriptors for PQ operation * with DMA0/1
*/ staticvoid ppc440spe_desc_init_dma01pq(struct ppc440spe_adma_desc_slot *desc, int dst_cnt, int src_cnt, unsignedlong flags, unsignedlong op)
{ struct dma_cdb *hw_desc; struct ppc440spe_adma_desc_slot *iter;
u8 dopc;
/* Common initialization of a PQ descriptors chain */
set_bits(op, &desc->flags);
desc->src_cnt = src_cnt;
desc->dst_cnt = dst_cnt;
/* WXOR MULTICAST if both P and Q are being computed * MV_SG1_SG2 if Q only
*/
dopc = (desc->dst_cnt == DMA_DEST_MAX_NUM) ?
DMA_CDB_OPC_MULTICAST : DMA_CDB_OPC_MV_SG1_SG2;
if (likely(!list_is_last(&iter->chain_node,
&desc->group_list))) { /* set 'next' pointer */
iter->hw_next = list_entry(iter->chain_node.next, struct ppc440spe_adma_desc_slot, chain_node);
clear_bit(PPC440SPE_DESC_INT, &iter->flags);
} else { /* this is the last descriptor. * this slot will be pasted from ADMA level * each time it wants to configure parameters * of the transaction (src, dst, ...)
*/
iter->hw_next = NULL; if (flags & DMA_PREP_INTERRUPT)
set_bit(PPC440SPE_DESC_INT, &iter->flags); else
clear_bit(PPC440SPE_DESC_INT, &iter->flags);
}
}
/* Set OPS depending on WXOR/RXOR type of operation */ if (!test_bit(PPC440SPE_DESC_RXOR, &desc->flags)) { /* This is a WXOR only chain: * - first descriptors are for zeroing destinations * if PPC440SPE_ZERO_P/Q set; * - descriptors remained are for GF-XOR operations.
*/
iter = list_first_entry(&desc->group_list, struct ppc440spe_adma_desc_slot,
chain_node);
if (test_bit(PPC440SPE_ZERO_P, &desc->flags)) {
hw_desc = iter->hw_desc;
hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
iter = list_first_entry(&iter->chain_node, struct ppc440spe_adma_desc_slot,
chain_node);
}
if (test_bit(PPC440SPE_ZERO_Q, &desc->flags)) {
hw_desc = iter->hw_desc;
hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
iter = list_first_entry(&iter->chain_node, struct ppc440spe_adma_desc_slot,
chain_node);
}
list_for_each_entry_from(iter, &desc->group_list, chain_node) {
hw_desc = iter->hw_desc;
hw_desc->opc = dopc;
}
} else { /* This is either RXOR-only or mixed RXOR/WXOR */
/* The first 1 or 2 slots in chain are always RXOR, * if need to calculate P & Q, then there are two * RXOR slots; if only P or only Q, then there is one
*/
iter = list_first_entry(&desc->group_list, struct ppc440spe_adma_desc_slot,
chain_node);
hw_desc = iter->hw_desc;
hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
if (desc->dst_cnt == DMA_DEST_MAX_NUM) {
iter = list_first_entry(&iter->chain_node, struct ppc440spe_adma_desc_slot,
chain_node);
hw_desc = iter->hw_desc;
hw_desc->opc = DMA_CDB_OPC_MV_SG1_SG2;
}
/* The remaining descs (if any) are WXORs */ if (test_bit(PPC440SPE_DESC_WXOR, &desc->flags)) {
iter = list_first_entry(&iter->chain_node, struct ppc440spe_adma_desc_slot,
chain_node);
list_for_each_entry_from(iter, &desc->group_list,
chain_node) {
hw_desc = iter->hw_desc;
hw_desc->opc = dopc;
}
}
}
}
/** * ppc440spe_desc_init_dma01pqzero_sum - initialize the descriptor * for PQ_ZERO_SUM operation
*/ staticvoid ppc440spe_desc_init_dma01pqzero_sum( struct ppc440spe_adma_desc_slot *desc, int dst_cnt, int src_cnt)
{ struct dma_cdb *hw_desc; struct ppc440spe_adma_desc_slot *iter; int i = 0;
u8 dopc = (dst_cnt == 2) ? DMA_CDB_OPC_MULTICAST :
DMA_CDB_OPC_MV_SG1_SG2; /* * Initialize starting from 2nd or 3rd descriptor dependent * on dst_cnt. First one or two slots are for cloning P * and/or Q to chan->pdest and/or chan->qdest as we have * to preserve original P/Q.
*/
iter = list_first_entry(&desc->group_list, struct ppc440spe_adma_desc_slot, chain_node);
iter = list_entry(iter->chain_node.next, struct ppc440spe_adma_desc_slot, chain_node);
if (dst_cnt > 1) {
iter = list_entry(iter->chain_node.next, struct ppc440spe_adma_desc_slot, chain_node);
} /* initialize each source descriptor in chain */
list_for_each_entry_from(iter, &desc->group_list, chain_node) {
hw_desc = iter->hw_desc;
memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
iter->src_cnt = 0;
iter->dst_cnt = 0;
/* This is a ZERO_SUM operation: * - <src_cnt> descriptors starting from 2nd or 3rd * descriptor are for GF-XOR operations; * - remaining <dst_cnt> descriptors are for checking the result
*/ if (i++ < src_cnt) /* MV_SG1_SG2 if only Q is being verified * MULTICAST if both P and Q are being verified
*/
hw_desc->opc = dopc; else /* DMA_CDB_OPC_DCHECK128 operation */
hw_desc->opc = DMA_CDB_OPC_DCHECK128;
if (likely(!list_is_last(&iter->chain_node,
&desc->group_list))) { /* set 'next' pointer */
iter->hw_next = list_entry(iter->chain_node.next, struct ppc440spe_adma_desc_slot,
chain_node);
} else { /* this is the last descriptor. * this slot will be pasted from ADMA level * each time it wants to configure parameters * of the transaction (src, dst, ...)
*/
iter->hw_next = NULL; /* always enable interrupt generation since we get * the status of pqzero from the handler
*/
set_bit(PPC440SPE_DESC_INT, &iter->flags);
}
}
desc->src_cnt = src_cnt;
desc->dst_cnt = dst_cnt;
}
/** * ppc440spe_desc_set_byte_count - set number of data bytes involved * into the operation
*/ staticvoid ppc440spe_desc_set_byte_count(struct ppc440spe_adma_desc_slot *desc, struct ppc440spe_adma_chan *chan,
u32 byte_count)
{ struct dma_cdb *dma_hw_desc; struct xor_cb *xor_hw_desc;
switch (chan->device->id) { case PPC440SPE_DMA0_ID: case PPC440SPE_DMA1_ID:
dma_hw_desc = desc->hw_desc;
dma_hw_desc->cnt = cpu_to_le32(byte_count); break; case PPC440SPE_XOR_ID:
xor_hw_desc = desc->hw_desc;
xor_hw_desc->cbbc = byte_count; break;
}
}
/** * ppc440spe_desc_set_rxor_block_size - set RXOR block size
*/ staticinlinevoid ppc440spe_desc_set_rxor_block_size(u32 byte_count)
{ /* assume that byte_count is aligned on the 512-boundary; * thus write it directly to the register (bits 23:31 are * reserved there).
*/
dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CF2H, byte_count);
}
/** * ppc440spe_desc_set_link - set the address of descriptor following this * descriptor in chain
*/ staticvoid ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan, struct ppc440spe_adma_desc_slot *prev_desc, struct ppc440spe_adma_desc_slot *next_desc)
{ unsignedlong flags; struct ppc440spe_adma_desc_slot *tail = next_desc;
if (unlikely(!prev_desc || !next_desc ||
(prev_desc->hw_next && prev_desc->hw_next != next_desc))) { /* If previous next is overwritten something is wrong. * though we may refetch from append to initiate list * processing; in this case - it's ok.
*/
printk(KERN_ERR "%s: prev_desc=0x%p; next_desc=0x%p; " "prev->hw_next=0x%p\n", __func__, prev_desc,
next_desc, prev_desc ? prev_desc->hw_next : 0);
BUG();
}
local_irq_save(flags);
/* do s/w chaining both for DMA and XOR descriptors */
prev_desc->hw_next = next_desc;
switch (chan->device->id) { case PPC440SPE_DMA0_ID: case PPC440SPE_DMA1_ID: break; case PPC440SPE_XOR_ID: /* bind descriptor to the chain */ while (tail->hw_next)
tail = tail->hw_next;
xor_last_linked = tail;
if (prev_desc == xor_last_submit) /* do not link to the last submitted CB */ break;
ppc440spe_xor_set_link(prev_desc, next_desc); break;
}
local_irq_restore(flags);
}
/** * ppc440spe_desc_get_link - get the address of the descriptor that * follows this one
*/ staticinline u32 ppc440spe_desc_get_link(struct ppc440spe_adma_desc_slot *desc, struct ppc440spe_adma_chan *chan)
{ if (!desc->hw_next) return 0;
/** * ppc440spe_chan_xor_slot_count - get the number of slots necessary for * XOR operation
*/ staticint ppc440spe_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op)
{ int slot_cnt;
/* each XOR descriptor provides up to 16 source operands */
slot_cnt = *slots_per_op = (src_cnt + XOR_MAX_OPS - 1)/XOR_MAX_OPS;
if (likely(len <= PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT)) return slot_cnt;
printk(KERN_ERR "%s: len %d > max %d !!\n",
__func__, len, PPC440SPE_ADMA_XOR_MAX_BYTE_COUNT);
BUG(); return slot_cnt;
}
/** * ppc440spe_dma2_pq_slot_count - get the number of slots necessary for * DMA2 PQ operation
*/ staticint ppc440spe_dma2_pq_slot_count(dma_addr_t *srcs, int src_cnt, size_t len)
{ signedlonglong order = 0; int state = 0; int addr_count = 0; int i; for (i = 1; i < src_cnt; i++) {
dma_addr_t cur_addr = srcs[i];
dma_addr_t old_addr = srcs[i-1]; switch (state) { case 0: if (cur_addr == old_addr + len) { /* direct RXOR */
order = 1;
state = 1; if (i == src_cnt-1)
addr_count++;
} elseif (old_addr == cur_addr + len) { /* reverse RXOR */
order = -1;
state = 1; if (i == src_cnt-1)
addr_count++;
} else {
state = 3;
} break; case 1: if (i == src_cnt-2 || (order == -1
&& cur_addr != old_addr - len)) {
order = 0;
state = 0;
addr_count++;
} elseif (cur_addr == old_addr + len*order) {
state = 2; if (i == src_cnt-1)
addr_count++;
} elseif (cur_addr == old_addr + 2*len) {
state = 2; if (i == src_cnt-1)
addr_count++;
} elseif (cur_addr == old_addr + 3*len) {
state = 2; if (i == src_cnt-1)
addr_count++;
} else {
order = 0;
state = 0;
addr_count++;
} break; case 2:
order = 0;
state = 0;
addr_count++; break;
} if (state == 3) break;
} if (src_cnt <= 1 || (state != 1 && state != 2)) {
pr_err("%s: src_cnt=%d, state=%d, addr_count=%d, order=%lld\n",
__func__, src_cnt, state, addr_count, order); for (i = 0; i < src_cnt; i++)
pr_err("\t[%d] 0x%llx \n", i, srcs[i]);
BUG();
}
switch (chan->device->id) { case PPC440SPE_DMA0_ID: case PPC440SPE_DMA1_ID: /* read FIFO to ack */
dma_reg = chan->device->dma_reg; while ((rv = ioread32(&dma_reg->csfpl))) {
i = rv & DMA_CDB_ADDR_MSK;
cdb = (struct dma_cdb *)&p[i -
(u32)chan->device->dma_desc_pool];
/* Clear opcode to ack. This is necessary for * ZeroSum operations only
*/
cdb->opc = 0;
if (test_bit(PPC440SPE_RXOR_RUN,
&ppc440spe_rxor_state)) { /* probably this is a completed RXOR op, * get pointer to CDB using the fact that * physical and virtual addresses of CDB * in pools have the same offsets
*/ if (le32_to_cpu(cdb->sg1u) &
DMA_CUED_XOR_BASE) { /* this is a RXOR */
clear_bit(PPC440SPE_RXOR_RUN,
&ppc440spe_rxor_state);
}
}
/* if the XORcore is idle, but there are unprocessed CBs * then refetch the s/w chain here
*/ if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) &&
do_xor_refetch)
ppc440spe_chan_append(chan); break;
}
}
/** * ppc440spe_chan_is_busy - get the channel status
*/ staticint ppc440spe_chan_is_busy(struct ppc440spe_adma_chan *chan)
{ struct dma_regs *dma_reg; struct xor_regs *xor_reg; int busy = 0;
switch (chan->device->id) { case PPC440SPE_DMA0_ID: case PPC440SPE_DMA1_ID:
dma_reg = chan->device->dma_reg; /* if command FIFO's head and tail pointers are equal and * status tail is the same as command, then channel is free
*/ if (ioread16(&dma_reg->cpfhp) != ioread16(&dma_reg->cpftp) ||
ioread16(&dma_reg->cpftp) != ioread16(&dma_reg->csftp))
busy = 1; break; case PPC440SPE_XOR_ID: /* use the special status bit for the XORcore
*/
xor_reg = chan->device->xor_reg;
busy = (ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT) ? 1 : 0; break;
}
/** * ppc440spe_dma_put_desc - put DMA0,1 descriptor to FIFO. * called with irqs disabled
*/ staticvoid ppc440spe_dma_put_desc(struct ppc440spe_adma_chan *chan, struct ppc440spe_adma_desc_slot *desc)
{
u32 pcdb; struct dma_regs *dma_reg = chan->device->dma_reg;
pcdb = desc->phys; if (!test_bit(PPC440SPE_DESC_INT, &desc->flags))
pcdb |= DMA_CDB_NO_INT;
chan_last_sub[chan->device->id] = desc;
ADMA_LL_DBG(print_cb(chan, desc->hw_desc));
iowrite32(pcdb, &dma_reg->cpfpl);
}
/** * ppc440spe_chan_append - update the h/w chain in the channel
*/ staticvoid ppc440spe_chan_append(struct ppc440spe_adma_chan *chan)
{ struct xor_regs *xor_reg; struct ppc440spe_adma_desc_slot *iter; struct xor_cb *xcb;
u32 cur_desc; unsignedlong flags;
local_irq_save(flags);
switch (chan->device->id) { case PPC440SPE_DMA0_ID: case PPC440SPE_DMA1_ID:
cur_desc = ppc440spe_chan_get_current_descriptor(chan);
if (likely(cur_desc)) {
iter = chan_last_sub[chan->device->id];
BUG_ON(!iter);
} else { /* first peer */
iter = chan_first_cdb[chan->device->id];
BUG_ON(!iter);
ppc440spe_dma_put_desc(chan, iter);
chan->hw_chain_inited = 1;
}
/* is there something new to append */ if (!iter->hw_next) break;
/* flush descriptors from the s/w queue to fifo */
list_for_each_entry_continue(iter, &chan->chain, chain_node) {
ppc440spe_dma_put_desc(chan, iter); if (!iter->hw_next) break;
} break; case PPC440SPE_XOR_ID: /* update h/w links and refetch */ if (!xor_last_submit->hw_next) break;
xor_reg = chan->device->xor_reg; /* the last linked CDB has to generate an interrupt * that we'd be able to append the next lists to h/w * regardless of the XOR engine state at the moment of * appending of these next lists
*/
xcb = xor_last_linked->hw_desc;
xcb->cbc |= XOR_CBCR_CBCE_BIT;
if (!(ioread32be(&xor_reg->sr) & XOR_SR_XCP_BIT)) { /* XORcore is idle. Refetch now */
do_xor_refetch = 0;
ppc440spe_xor_set_link(xor_last_submit,
xor_last_submit->hw_next);
/** * ppc440spe_can_rxor - check if the operands may be processed with RXOR
*/ staticint ppc440spe_can_rxor(struct page **srcs, int src_cnt, size_t len)
{ int i, order = 0, state = 0; int idx = 0;
/* Skip holes in the source list before checking */ for (i = 0; i < src_cnt; i++) { if (!srcs[i]) continue;
ppc440spe_rxor_srcs[idx++] = srcs[i];
}
src_cnt = idx;
for (i = 1; i < src_cnt; i++) { char *cur_addr = page_address(ppc440spe_rxor_srcs[i]); char *old_addr = page_address(ppc440spe_rxor_srcs[i - 1]);
switch (state) { case 0: if (cur_addr == old_addr + len) { /* direct RXOR */
order = 1;
state = 1;
} elseif (old_addr == cur_addr + len) { /* reverse RXOR */
order = -1;
state = 1;
} else goto out; break; case 1: if ((i == src_cnt - 2) ||
(order == -1 && cur_addr != old_addr - len)) {
order = 0;
state = 0;
} elseif ((cur_addr == old_addr + len * order) ||
(cur_addr == old_addr + 2 * len) ||
(cur_addr == old_addr + 3 * len)) {
state = 2;
} else {
order = 0;
state = 0;
} break; case 2:
order = 0;
state = 0; break;
}
}
out: if (state == 1 || state == 2) return 1;
return 0;
}
/** * ppc440spe_adma_device_estimate - estimate the efficiency of processing * the operation given on this channel. It's assumed that 'chan' is * capable to process 'cap' type of operation. * @chan: channel to use * @cap: type of transaction * @dst_lst: array of destination pointers * @dst_cnt: number of destination operands * @src_lst: array of source pointers * @src_cnt: number of source operands * @src_sz: size of each source operand
*/ staticint ppc440spe_adma_estimate(struct dma_chan *chan, enum dma_transaction_type cap, struct page **dst_lst, int dst_cnt, struct page **src_lst, int src_cnt, size_t src_sz)
{ int ef = 1;
if (cap == DMA_PQ || cap == DMA_PQ_VAL) { /* If RAID-6 capabilities were not activated don't try * to use them
*/ if (unlikely(!ppc440spe_r6_enabled)) return -1;
} /* In the current implementation of ppc440spe ADMA driver it * makes sense to pick out only pq case, because it may be * processed: * (1) either using Biskup method on DMA2; * (2) or on DMA0/1. * Thus we give a favour to (1) if the sources are suitable; * else let it be processed on one of the DMA0/1 engines. * In the sum_product case where destination is also the * source process it on DMA0/1 only.
*/ if (cap == DMA_PQ && chan->chan_id == PPC440SPE_XOR_ID) {
if (dst_cnt == 1 && src_cnt == 2 && dst_lst[0] == src_lst[1])
ef = 0; /* sum_product case, process on DMA0/1 */ elseif (ppc440spe_can_rxor(src_lst, src_cnt, src_sz))
ef = 3; /* override (DMA0/1 + idle) */ else
ef = 0; /* can't process on DMA2 if !rxor */
}
/* channel idleness increases the priority */ if (likely(ef) &&
!ppc440spe_chan_is_busy(to_ppc440spe_adma_chan(chan)))
ef++;
if (unlikely(!src_sz)) return NULL; if (src_sz > PAGE_SIZE) { /* * should a user of the api ever pass > PAGE_SIZE requests * we sort out cases where temporary page-sized buffers * are used.
*/ switch (cap) { case DMA_PQ: if (src_cnt == 1 && dst_lst[1] == src_lst[0]) return NULL; if (src_cnt == 2 && dst_lst[1] == src_lst[1]) return NULL; break; case DMA_PQ_VAL: case DMA_XOR_VAL: return NULL; default: break;
}
}
list_for_each_entry(ref, &ppc440spe_adma_chan_list, node) { if (dma_has_cap(cap, ref->chan->device->cap_mask)) { int rank;
/** * ppc440spe_get_group_entry - get group entry with index idx * @tdesc: is the last allocated slot in the group.
*/ staticstruct ppc440spe_adma_desc_slot *
ppc440spe_get_group_entry(struct ppc440spe_adma_desc_slot *tdesc, u32 entry_idx)
{ struct ppc440spe_adma_desc_slot *iter = tdesc->group_head; int i = 0;
/** * ppc440spe_adma_free_slots - flags descriptor slots for reuse * @slot: Slot to free * Caller must hold &ppc440spe_chan->lock while calling this function
*/ staticvoid ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot, struct ppc440spe_adma_chan *chan)
{ int stride = slot->slots_per_op;
/** * ppc440spe_adma_run_tx_complete_actions - call functions to be called * upon completion
*/ static dma_cookie_t ppc440spe_adma_run_tx_complete_actions( struct ppc440spe_adma_desc_slot *desc, struct ppc440spe_adma_chan *chan,
dma_cookie_t cookie)
{
BUG_ON(desc->async_tx.cookie < 0); if (desc->async_tx.cookie > 0) {
cookie = desc->async_tx.cookie;
desc->async_tx.cookie = 0;
dma_descriptor_unmap(&desc->async_tx); /* call the callback (must not sleep or submit new * operations to this channel)
*/
dmaengine_desc_get_callback_invoke(&desc->async_tx, NULL);
}
/* run dependent operations */
dma_run_dependencies(&desc->async_tx);
return cookie;
}
/** * ppc440spe_adma_clean_slot - clean up CDB slot (if ack is set)
*/ staticint ppc440spe_adma_clean_slot(struct ppc440spe_adma_desc_slot *desc, struct ppc440spe_adma_chan *chan)
{ /* the client is allowed to attach dependent operations * until 'ack' is set
*/ if (!async_tx_test_ack(&desc->async_tx)) return 0;
/* leave the last descriptor in the chain * so we can append to it
*/ if (list_is_last(&desc->chain_node, &chan->chain) ||
desc->phys == ppc440spe_chan_get_current_descriptor(chan)) return 1;
if (chan->device->id != PPC440SPE_XOR_ID) { /* our DMA interrupt handler clears opc field of * each processed descriptor. For all types of * operations except for ZeroSum we do not actually * need ack from the interrupt handler. ZeroSum is a * special case since the result of this operation * is available from the handler only, so if we see * such type of descriptor (which is unprocessed yet) * then leave it in chain.
*/ struct dma_cdb *cdb = desc->hw_desc; if (cdb->opc == DMA_CDB_OPC_DCHECK128) return 1;
}
/** * __ppc440spe_adma_slot_cleanup - this is the common clean-up routine * which runs through the channel CDBs list until reach the descriptor * currently processed. When routine determines that all CDBs of group * are completed then corresponding callbacks (if any) are called and slots * are freed.
*/ staticvoid __ppc440spe_adma_slot_cleanup(struct ppc440spe_adma_chan *chan)
{ struct ppc440spe_adma_desc_slot *iter, *_iter, *group_start = NULL;
dma_cookie_t cookie = 0;
u32 current_desc = ppc440spe_chan_get_current_descriptor(chan); int busy = ppc440spe_chan_is_busy(chan); int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
/* do not advance past the current descriptor loaded into the * hardware channel,subsequent descriptors are either in process * or have not been submitted
*/ if (seen_current) break;
/* stop the search if we reach the current descriptor and the * channel is busy, or if it appears that the current descriptor * needs to be re-read (i.e. has been appended to)
*/ if (iter->phys == current_desc) {
BUG_ON(seen_current++); if (busy || ppc440spe_desc_get_link(iter, chan)) { /* not all descriptors of the group have * been completed; exit.
*/ break;
}
}
/* detect the start of a group transaction */ if (!slot_cnt && !slots_per_op) {
slot_cnt = iter->slot_cnt;
slots_per_op = iter->slots_per_op; if (slot_cnt <= slots_per_op) {
slot_cnt = 0;
slots_per_op = 0;
}
}
if (slot_cnt) { if (!group_start)
group_start = iter;
slot_cnt -= slots_per_op;
}
/* all the members of a group are complete */ if (slots_per_op != 0 && slot_cnt == 0) { struct ppc440spe_adma_desc_slot *grp_iter, *_grp_iter; int end_of_chain = 0;
/* clean up the group */
slot_cnt = group_start->slot_cnt;
grp_iter = group_start;
list_for_each_entry_safe_from(grp_iter, _grp_iter,
&chan->chain, chain_node) {
BUG_ON(!num_slots || !slots_per_op); /* start search from the last allocated descrtiptor * if a contiguous allocation can not be found start searching * from the beginning of the list
*/
retry:
slots_found = 0; if (retry == 0)
iter = chan->last_used; else
iter = list_entry(&chan->all_slots, struct ppc440spe_adma_desc_slot,
slot_node);
list_for_each_entry_safe_continue(iter, _iter, &chan->all_slots,
slot_node) {
prefetch(_iter);
prefetch(&_iter->async_tx); if (iter->slots_per_op) {
slots_found = 0; continue;
}
/* start the allocation if the slot is correctly aligned */ if (!slots_found++)
alloc_start = iter;
/* initialize the channel and the chain with a null operation */ if (init) { switch (ppc440spe_chan->device->id) { case PPC440SPE_DMA0_ID: case PPC440SPE_DMA1_ID:
ppc440spe_chan->hw_chain_inited = 0; /* Use WXOR for self-testing */ if (!ppc440spe_r6_tchan)
ppc440spe_r6_tchan = ppc440spe_chan; break; case PPC440SPE_XOR_ID:
ppc440spe_chan_start_null_xor(ppc440spe_chan); break; default:
BUG();
}
ppc440spe_chan->needs_unmap = 1;
}
/** * ppc440spe_adma_tx_submit - submit new descriptor group to the channel * (it's not necessary that descriptors will be submitted to the h/w * chains too right now)
*/ static dma_cookie_t ppc440spe_adma_tx_submit(struct dma_async_tx_descriptor *tx)
{ struct ppc440spe_adma_desc_slot *sw_desc; struct ppc440spe_adma_chan *chan = to_ppc440spe_adma_chan(tx->chan); struct ppc440spe_adma_desc_slot *group_start, *old_chain_tail; int slot_cnt; int slots_per_op;
dma_cookie_t cookie;
if (unlikely(list_empty(&chan->chain))) { /* first peer */
list_splice_init(&sw_desc->group_list, &chan->chain);
chan_first_cdb[chan->device->id] = group_start;
} else { /* isn't first peer, bind CDBs to chain */
old_chain_tail = list_entry(chan->chain.prev, struct ppc440spe_adma_desc_slot,
chain_node);
list_splice_init(&sw_desc->group_list,
&old_chain_tail->chain_node); /* fix up the hardware chain */
ppc440spe_desc_set_link(chan, old_chain_tail, group_start);
}
/* increment the pending count by the number of operations */
chan->pending += slot_cnt / slots_per_op;
ppc440spe_adma_check_threshold(chan);
spin_unlock_bh(&chan->lock);
/* * Second descriptor, multiply data from the q page * and store the result in real destination.
*/
iter = list_first_entry(&iter->chain_node, struct ppc440spe_adma_desc_slot,
chain_node);
memset(iter->hw_desc, 0, sizeof(struct dma_cdb));
iter->hw_next = NULL; if (flags & DMA_PREP_INTERRUPT)
set_bit(PPC440SPE_DESC_INT, &iter->flags); else
clear_bit(PPC440SPE_DESC_INT, &iter->flags);
/* 2nd descriptor, multiply src[1] data and store the
* result in destination */
iter = list_first_entry(&iter->chain_node, struct ppc440spe_adma_desc_slot,
chain_node);
memset(iter->hw_desc, 0, sizeof(struct dma_cdb)); /* set 'next' pointer */
iter->hw_next = list_entry(iter->chain_node.next, struct ppc440spe_adma_desc_slot,
chain_node); if (flags & DMA_PREP_INTERRUPT)
set_bit(PPC440SPE_DESC_INT, &iter->flags); else
clear_bit(PPC440SPE_DESC_INT, &iter->flags);
staticstruct ppc440spe_adma_desc_slot *ppc440spe_dma01_prep_pq( struct ppc440spe_adma_chan *ppc440spe_chan,
dma_addr_t *dst, int dst_cnt, dma_addr_t *src, int src_cnt, constunsignedchar *scf, size_t len, unsignedlong flags)
{ int slot_cnt; struct ppc440spe_adma_desc_slot *sw_desc = NULL, *iter; unsignedlong op = 0; unsignedchar mult = 1;
pr_debug("%s: dst_cnt %d, src_cnt %d, len %d\n",
__func__, dst_cnt, src_cnt, len); /* select operations WXOR/RXOR depending on the * source addresses of operators and the number * of destinations (RXOR support only Q-parity calculations)
*/
set_bit(PPC440SPE_DESC_WXOR, &op); if (!test_and_set_bit(PPC440SPE_RXOR_RUN, &ppc440spe_rxor_state)) { /* no active RXOR; * do RXOR if: * - there are more than 1 source, * - len is aligned on 512-byte boundary, * - source addresses fit to one of 4 possible regions.
*/ if (src_cnt > 1 &&
!(len & MQ0_CF2H_RXOR_BS_MASK) &&
(src[0] + len) == src[1]) { /* may do RXOR R1 R2 */
set_bit(PPC440SPE_DESC_RXOR, &op); if (src_cnt != 2) { /* may try to enhance region of RXOR */ if ((src[1] + len) == src[2]) { /* do RXOR R1 R2 R3 */
set_bit(PPC440SPE_DESC_RXOR123,
&op);
} elseif ((src[1] + len * 2) == src[2]) { /* do RXOR R1 R2 R4 */
set_bit(PPC440SPE_DESC_RXOR124, &op);
} elseif ((src[1] + len * 3) == src[2]) { /* do RXOR R1 R2 R5 */
set_bit(PPC440SPE_DESC_RXOR125,
&op);
} else { /* do RXOR R1 R2 */
set_bit(PPC440SPE_DESC_RXOR12,
&op);
}
} else { /* do RXOR R1 R2 */
set_bit(PPC440SPE_DESC_RXOR12, &op);
}
}
if (!test_bit(PPC440SPE_DESC_RXOR, &op)) { /* can not do this operation with RXOR */
clear_bit(PPC440SPE_RXOR_RUN,
&ppc440spe_rxor_state);
} else { /* can do; set block size right now */
ppc440spe_desc_set_rxor_block_size(len);
}
}
/* Number of necessary slots depends on operation type selected */ if (!test_bit(PPC440SPE_DESC_RXOR, &op)) { /* This is a WXOR only chain. Need descriptors for each * source to GF-XOR them with WXOR, and need descriptors * for each destination to zero them with WXOR
*/
slot_cnt = src_cnt;
if (flags & DMA_PREP_ZERO_P) {
slot_cnt++;
set_bit(PPC440SPE_ZERO_P, &op);
} if (flags & DMA_PREP_ZERO_Q) {
slot_cnt++;
set_bit(PPC440SPE_ZERO_Q, &op);
}
} else { /* Need 1/2 descriptor for RXOR operation, and * need (src_cnt - (2 or 3)) for WXOR of sources * remained (if any)
*/
slot_cnt = dst_cnt;
if (flags & DMA_PREP_ZERO_P)
set_bit(PPC440SPE_ZERO_P, &op); if (flags & DMA_PREP_ZERO_Q)
set_bit(PPC440SPE_ZERO_Q, &op);
/* Thus we have either RXOR only chain or * mixed RXOR/WXOR
*/ if (slot_cnt == dst_cnt) /* RXOR only chain */
clear_bit(PPC440SPE_DESC_WXOR, &op);
}
spin_lock_bh(&ppc440spe_chan->lock); /* for both RXOR/WXOR each descriptor occupies one slot */
sw_desc = ppc440spe_adma_alloc_slots(ppc440spe_chan, slot_cnt, 1); if (sw_desc) {
ppc440spe_desc_init_dma01pq(sw_desc, dst_cnt, src_cnt,
flags, op);
/* NOTE: "Multi = 0 is equivalent to = 1" as it * stated in 440SPSPe_RAID6_Addendum_UM_1_17.pdf * doesn't work for RXOR with DMA0/1! Instead, multi=0 * leads to zeroing source data after RXOR. * So, for P case set-up mult=1 explicitly.
*/ if (!(flags & DMA_PREP_PQ_DISABLE_Q))
mult = scf[src_cnt];
ppc440spe_adma_pq_set_src_mult(sw_desc,
mult, src_cnt, dst_cnt - 1);
}
/* Always use WXOR for P/Q calculations (two destinations). * Need 1 or 2 extra slots to verify results are zero.
*/
idst = dst_cnt = (pdest && qdest) ? 2 : 1;
/* One additional slot per destination to clone P/Q * before calculation (we have to preserve destinations).
*/
slot_cnt = src_cnt + dst_cnt * 2;
slots_per_op = 1;
/* Setup destinations for P/Q ops */
ppc440spe_adma_pqzero_sum_set_dest(sw_desc, pdest, qdest);
/* Setup zero QWORDs into DCHECK CDBs */
idst = dst_cnt;
list_for_each_entry_reverse(iter, &sw_desc->group_list,
chain_node) { /* * The last CDB corresponds to Q-parity check, * the one before last CDB corresponds * P-parity check
*/ if (idst == DMA_DEST_MAX_NUM) { if (idst == dst_cnt) {
set_bit(PPC440SPE_DESC_QCHECK,
&iter->flags);
} else {
set_bit(PPC440SPE_DESC_PCHECK,
&iter->flags);
}
} else { if (qdest) {
set_bit(PPC440SPE_DESC_QCHECK,
&iter->flags);
} else {
set_bit(PPC440SPE_DESC_PCHECK,
&iter->flags);
}
}
iter->xor_check_result = pqres;
/* * set it to zero, if check fail then result will * be updated
*/
*iter->xor_check_result = 0;
ppc440spe_desc_set_dcheck(iter, ppc440spe_chan,
ppc440spe_qword);
if (!(--dst_cnt)) break;
}
/* Setup sources and mults for P/Q ops */
list_for_each_entry_continue_reverse(iter, &sw_desc->group_list,
chain_node) { struct ppc440spe_adma_chan *chan;
u32 mult_dst;
switch (chan->device->id) { case PPC440SPE_DMA0_ID: case PPC440SPE_DMA1_ID: /* walk through the WXOR source list and set P/Q-destinations * for each slot:
*/ if (!test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) { /* This is WXOR-only chain; may have 1/2 zero descs */ if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
index++; if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
index++;
if (index) { /* To clear destinations update the descriptor * (1st,2nd, or both depending on flags)
*/
index = 0; if (test_bit(PPC440SPE_ZERO_P,
&sw_desc->flags)) {
iter = ppc440spe_get_group_entry(
sw_desc, index++);
ppc440spe_adma_pq_zero_op(iter, chan,
paddr);
}
if (test_bit(PPC440SPE_ZERO_Q,
&sw_desc->flags)) {
iter = ppc440spe_get_group_entry(
sw_desc, index++);
ppc440spe_adma_pq_zero_op(iter, chan,
qaddr);
}
return;
}
} else { /* This is RXOR-only or RXOR/WXOR mixed chain */
/* If we want to include destination into calculations, * then make dest addresses cued with mult=1 (XOR).
*/
ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
DMA_CUED_XOR_HB :
DMA_CUED_XOR_BASE |
(1 << DMA_CUED_MULT1_OFF);
qpath = test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags) ?
DMA_CUED_XOR_HB :
DMA_CUED_XOR_BASE |
(1 << DMA_CUED_MULT1_OFF);
/* Setup destination(s) in RXOR slot(s) */
iter = ppc440spe_get_group_entry(sw_desc, index++);
ppc440spe_desc_set_dest_addr(iter, chan,
paddr ? ppath : qpath,
paddr ? paddr : qaddr, 0); if (!addr) { /* two destinations */
iter = ppc440spe_get_group_entry(sw_desc,
index++);
ppc440spe_desc_set_dest_addr(iter, chan,
qpath, qaddr, 0);
}
if (test_bit(PPC440SPE_DESC_WXOR, &sw_desc->flags)) { /* Setup destination(s) in remaining WXOR * slots
*/
iter = ppc440spe_get_group_entry(sw_desc,
index); if (addr) { /* one destination */
list_for_each_entry_from(iter,
&sw_desc->group_list,
chain_node)
ppc440spe_desc_set_dest_addr(
iter, chan,
DMA_CUED_XOR_BASE,
addr, 0);
case PPC440SPE_XOR_ID: /* DMA2 descriptors have only 1 destination, so there are * two chains - one for each dest. * If we want to include destination into calculations, * then make dest addresses cued with mult=1 (XOR).
*/
ppath = test_bit(PPC440SPE_ZERO_P, &sw_desc->flags) ?
DMA_CUED_XOR_HB :
DMA_CUED_XOR_BASE |
(1 << DMA_CUED_MULT1_OFF);
iter = ppc440spe_get_group_entry(sw_desc, 0); for (i = 0; i < sw_desc->descs_per_op; i++) {
ppc440spe_desc_set_dest_addr(iter, chan,
paddr ? ppath : qpath,
paddr ? paddr : qaddr, 0);
iter = list_entry(iter->chain_node.next, struct ppc440spe_adma_desc_slot,
chain_node);
}
if (!addr) { /* Two destinations; setup Q here */
iter = ppc440spe_get_group_entry(sw_desc,
sw_desc->descs_per_op); for (i = 0; i < sw_desc->descs_per_op; i++) {
ppc440spe_desc_set_dest_addr(iter,
chan, qpath, qaddr, 0);
iter = list_entry(iter->chain_node.next, struct ppc440spe_adma_desc_slot,
chain_node);
}
}
break;
}
}
/** * ppc440spe_adma_pq_zero_sum_set_dest - set destination address into descriptor * for the PQ_ZERO_SUM operation
*/ staticvoid ppc440spe_adma_pqzero_sum_set_dest( struct ppc440spe_adma_desc_slot *sw_desc,
dma_addr_t paddr, dma_addr_t qaddr)
{ struct ppc440spe_adma_desc_slot *iter, *end; struct ppc440spe_adma_chan *chan;
dma_addr_t addr = 0; int idx;
/* walk through the WXOR source list and set P/Q-destinations * for each slot
*/
idx = (paddr && qaddr) ? 2 : 1; /* set end */
list_for_each_entry_reverse(end, &sw_desc->group_list,
chain_node) { if (!(--idx)) break;
} /* set start */
idx = (paddr && qaddr) ? 2 : 1;
iter = ppc440spe_get_group_entry(sw_desc, idx);
/* The remaining descriptors are DATACHECK. These have no need in * destination. Actually, these destinations are used there * as sources for check operation. So, set addr as source.
*/
ppc440spe_desc_set_src_addr(end, chan, 0, 0, addr ? addr : paddr);
if (!addr) {
end = list_entry(end->chain_node.next, struct ppc440spe_adma_desc_slot, chain_node);
ppc440spe_desc_set_src_addr(end, chan, 0, 0, qaddr);
}
}
/** * ppc440spe_desc_set_xor_src_cnt - set source count into descriptor
*/ staticinlinevoid ppc440spe_desc_set_xor_src_cnt( struct ppc440spe_adma_desc_slot *desc, int src_cnt)
{ struct xor_cb *hw_desc = desc->hw_desc;
switch (chan->device->id) { case PPC440SPE_DMA0_ID: case PPC440SPE_DMA1_ID: /* DMA0,1 may do: WXOR, RXOR, RXOR+WXORs chain
*/ if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) { /* RXOR-only or RXOR/WXOR operation */ int iskip = test_bit(PPC440SPE_DESC_RXOR12,
&sw_desc->flags) ? 2 : 3;
if (index == 0) { /* 1st slot (RXOR) */ /* setup sources region (R1-2-3, R1-2-4, * or R1-2-5)
*/ if (test_bit(PPC440SPE_DESC_RXOR12,
&sw_desc->flags))
haddr = DMA_RXOR12 <<
DMA_CUED_REGION_OFF; elseif (test_bit(PPC440SPE_DESC_RXOR123,
&sw_desc->flags))
haddr = DMA_RXOR123 <<
DMA_CUED_REGION_OFF; elseif (test_bit(PPC440SPE_DESC_RXOR124,
&sw_desc->flags))
haddr = DMA_RXOR124 <<
DMA_CUED_REGION_OFF; elseif (test_bit(PPC440SPE_DESC_RXOR125,
&sw_desc->flags))
haddr = DMA_RXOR125 <<
DMA_CUED_REGION_OFF; else
BUG();
haddr |= DMA_CUED_XOR_BASE;
iter = ppc440spe_get_group_entry(sw_desc, 0);
} elseif (index < iskip) { /* 1st slot (RXOR) * shall actually set source address only once * instead of first <iskip>
*/
iter = NULL;
} else { /* 2nd/3d and next slots (WXOR); * skip first slot with RXOR
*/
haddr = DMA_CUED_XOR_HB;
iter = ppc440spe_get_group_entry(sw_desc,
index - iskip + sw_desc->dst_cnt);
}
} else { int znum = 0;
/* WXOR-only operation; skip first slots with * zeroing destinations
*/ if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
znum++; if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
znum++;
haddr = DMA_CUED_XOR_HB;
iter = ppc440spe_get_group_entry(sw_desc,
index + znum);
}
if (likely(iter)) {
ppc440spe_desc_set_src_addr(iter, chan, 0, haddr, addr);
if (!index &&
test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags) &&
sw_desc->dst_cnt == 2) { /* if we have two destinations for RXOR, then * setup source in the second descr too
*/
iter = ppc440spe_get_group_entry(sw_desc, 1);
ppc440spe_desc_set_src_addr(iter, chan, 0,
haddr, addr);
}
} break;
case PPC440SPE_XOR_ID: /* DMA2 may do Biskup */
iter = sw_desc->group_head; if (iter->dst_cnt == 2) { /* both P & Q calculations required; set P src here */
ppc440spe_adma_dma2rxor_set_src(iter, index, addr);
/* this is for Q */
iter = ppc440spe_get_group_entry(sw_desc,
sw_desc->descs_per_op);
}
ppc440spe_adma_dma2rxor_set_src(iter, index, addr); break;
}
}
/** * ppc440spe_adma_memcpy_xor_set_src - set source address into descriptor
*/ staticvoid ppc440spe_adma_memcpy_xor_set_src( struct ppc440spe_adma_desc_slot *sw_desc,
dma_addr_t addr, int index)
{ struct ppc440spe_adma_chan *chan;
/** * ppc440spe_adma_dma2rxor_set_src - set RXOR source address; it's assumed that * ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
*/ staticvoid ppc440spe_adma_dma2rxor_set_src( struct ppc440spe_adma_desc_slot *desc, int index, dma_addr_t addr)
{ struct xor_cb *xcb = desc->hw_desc; int k = 0, op = 0, lop = 0;
/* get the RXOR operand which corresponds to index addr */ while (op <= index) {
lop = op; if (k == XOR_MAX_OPS) {
k = 0;
desc = list_entry(desc->chain_node.next, struct ppc440spe_adma_desc_slot, chain_node);
xcb = desc->hw_desc;
} if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
(DMA_RXOR12 << DMA_CUED_REGION_OFF))
op += 2; else
op += 3;
}
BUG_ON(k < 1);
if (test_bit(k-1, desc->reverse_flags)) { /* reverse operand order; put last op in RXOR group */ if (index == op - 1)
ppc440spe_rxor_set_src(desc, k - 1, addr);
} else { /* direct operand order; put first op in RXOR group */ if (index == lop)
ppc440spe_rxor_set_src(desc, k - 1, addr);
}
}
/** * ppc440spe_adma_dma2rxor_set_mult - set RXOR multipliers; it's assumed that * ppc440spe_adma_dma2rxor_prep_src() has already done prior this call
*/ staticvoid ppc440spe_adma_dma2rxor_set_mult( struct ppc440spe_adma_desc_slot *desc, int index, u8 mult)
{ struct xor_cb *xcb = desc->hw_desc; int k = 0, op = 0, lop = 0;
/* get the RXOR operand which corresponds to index mult */ while (op <= index) {
lop = op; if (k == XOR_MAX_OPS) {
k = 0;
desc = list_entry(desc->chain_node.next, struct ppc440spe_adma_desc_slot,
chain_node);
xcb = desc->hw_desc;
} if ((xcb->ops[k++].h & (DMA_RXOR12 << DMA_CUED_REGION_OFF)) ==
(DMA_RXOR12 << DMA_CUED_REGION_OFF))
op += 2; else
op += 3;
}
BUG_ON(k < 1); if (test_bit(k-1, desc->reverse_flags)) { /* reverse order */
ppc440spe_rxor_set_mult(desc, k - 1, op - index - 1, mult);
} else { /* direct order */
ppc440spe_rxor_set_mult(desc, k - 1, index - lop, mult);
}
}
switch (chan->device->id) { case PPC440SPE_DMA0_ID: case PPC440SPE_DMA1_ID: if (test_bit(PPC440SPE_DESC_RXOR, &sw_desc->flags)) { int region = test_bit(PPC440SPE_DESC_RXOR12,
&sw_desc->flags) ? 2 : 3;
if (index < region) { /* RXOR multipliers */
iter = ppc440spe_get_group_entry(sw_desc,
sw_desc->dst_cnt - 1); if (sw_desc->dst_cnt == 2)
iter1 = ppc440spe_get_group_entry(
sw_desc, 0);
/* WXOR-only; * skip first slots with destinations (if ZERO_DST has * place)
*/ if (test_bit(PPC440SPE_ZERO_P, &sw_desc->flags))
znum++; if (test_bit(PPC440SPE_ZERO_Q, &sw_desc->flags))
znum++;
iter = ppc440spe_get_group_entry(sw_desc, index + znum);
mult_idx = DMA_CUED_MULT1_OFF;
mult_dst = dst_pos ? DMA_CDB_SG_DST2 : DMA_CDB_SG_DST1;
}
if (likely(iter)) {
ppc440spe_desc_set_src_mult(iter, chan,
mult_idx, mult_dst, mult);
if (unlikely(iter1)) { /* if we have two destinations for RXOR, then * we've just set Q mult. Set-up P now.
*/
ppc440spe_desc_set_src_mult(iter1, chan,
mult_idx, mult_dst, 1);
}
} break;
case PPC440SPE_XOR_ID:
iter = sw_desc->group_head; if (sw_desc->dst_cnt == 2) { /* both P & Q calculations required; set P mult here */
ppc440spe_adma_dma2rxor_set_mult(iter, index, 1);
/* and then set Q mult */
iter = ppc440spe_get_group_entry(sw_desc,
sw_desc->descs_per_op);
}
ppc440spe_adma_dma2rxor_set_mult(iter, index, mult); break;
}
}
/* one is ok since we left it on there on purpose */ if (in_use_descs > 1)
printk(KERN_ERR "SPE: Freeing %d in use descriptors!\n",
in_use_descs - 1);
}
/** * ppc440spe_adma_tx_status - poll the status of an ADMA transaction * @chan: ADMA channel handle * @cookie: ADMA transaction identifier * @txstate: a holder for the current state of the channel
*/ staticenum dma_status ppc440spe_adma_tx_status(struct dma_chan *chan,
dma_cookie_t cookie, struct dma_tx_state *txstate)
{ struct ppc440spe_adma_chan *ppc440spe_chan; enum dma_status ret;
ppc440spe_chan = to_ppc440spe_adma_chan(chan);
ret = dma_cookie_status(chan, cookie, txstate); if (ret == DMA_COMPLETE) return ret;
/** * ppc440spe_adma_err_handler - DMA error interrupt handler; * do the same things as a eot handler
*/ static irqreturn_t ppc440spe_adma_err_handler(int irq, void *data)
{ struct ppc440spe_adma_chan *chan = data;
/** * ppc440spe_test_callback - called when test operation has been done
*/ staticvoid ppc440spe_test_callback(void *unused)
{
complete(&ppc440spe_r6_test_comp);
}
/** * ppc440spe_adma_issue_pending - flush all pending descriptors to h/w
*/ staticvoid ppc440spe_adma_issue_pending(struct dma_chan *chan)
{ struct ppc440spe_adma_chan *ppc440spe_chan;
if (ppc440spe_chan->pending) {
ppc440spe_chan->pending = 0;
ppc440spe_chan_append(ppc440spe_chan);
}
}
/** * ppc440spe_chan_start_null_xor - initiate the first XOR operation (DMA engines * use FIFOs (as opposite to chains used in XOR) so this is a XOR * specific operation)
*/ staticvoid ppc440spe_chan_start_null_xor(struct ppc440spe_adma_chan *chan)
{ struct ppc440spe_adma_desc_slot *sw_desc, *group_start;
dma_cookie_t cookie; int slot_cnt, slots_per_op;
/* initialize the completed cookie to be less than * the most recently used cookie
*/
chan->common.completed_cookie = cookie - 1;
/* channel should not be busy */
BUG_ON(ppc440spe_chan_is_busy(chan));
/* set the descriptor address */
ppc440spe_chan_set_first_xor_descriptor(chan, sw_desc);
/* run the descriptor */
ppc440spe_chan_run(chan);
} else
printk(KERN_ERR "ppc440spe adma%d" " failed to allocate null descriptor\n",
chan->device->id);
spin_unlock_bh(&chan->lock);
}
/** * ppc440spe_test_raid6 - test are RAID-6 capabilities enabled successfully. * For this we just perform one WXOR operation with the same source * and destination addresses, the GF-multiplier is 1; so if RAID-6 * capabilities are enabled then we'll get src/dst filled with zero.
*/ staticint ppc440spe_test_raid6(struct ppc440spe_adma_chan *chan)
{ struct ppc440spe_adma_desc_slot *sw_desc, *iter; struct page *pg; char *a;
dma_addr_t dma_addr, addrs[2]; unsignedlong op = 0; int rval = 0;
set_bit(PPC440SPE_DESC_WXOR, &op);
pg = alloc_page(GFP_KERNEL); if (!pg) return -ENOMEM;
/* Fill the test page with ones */
memset(page_address(pg), 0xFF, PAGE_SIZE);
dma_addr = dma_map_page(chan->device->dev, pg, 0,
PAGE_SIZE, DMA_BIDIRECTIONAL);
/* Now check if the test page is zeroed */
a = page_address(pg); if ((*(u32 *)a) == 0 && memcmp(a, a+4, PAGE_SIZE-4) == 0) { /* page is zero - RAID-6 enabled */
rval = 0;
} else { /* RAID-6 was not enabled */
rval = -EINVAL;
} exit:
__free_page(pg); return rval;
}
adev->irq = irq_of_parse_and_map(np, 0); if (!adev->irq) {
dev_err(adev->dev, "no irq resource\n");
*initcode = PPC_ADMA_INIT_IRQ1;
ret = -ENXIO; goto err_irq_map;
}
dev_dbg(adev->dev, "irq %d, err irq %d\n",
adev->irq, adev->err_irq);
ret = request_irq(adev->irq, ppc440spe_adma_eot_handler,
0, dev_driver_string(adev->dev), chan); if (ret) {
dev_err(adev->dev, "can't request irq %d\n",
adev->irq);
*initcode = PPC_ADMA_INIT_IRQ1;
ret = -EIO; goto err_req1;
}
/* only DMA engines have a separate error IRQ * so it's Ok if err_irq < 0 in XOR engine case.
*/ if (adev->err_irq > 0) { /* both DMA engines share common error IRQ */
ret = request_irq(adev->err_irq,
ppc440spe_adma_err_handler,
IRQF_SHARED,
dev_driver_string(adev->dev),
chan); if (ret) {
dev_err(adev->dev, "can't request irq %d\n",
adev->err_irq);
*initcode = PPC_ADMA_INIT_IRQ2;
ret = -EIO; goto err_req2;
}
}
if (of_device_is_compatible(np, "amcc,xor-accelerator")) {
id = PPC440SPE_XOR_ID; /* As far as the XOR engine is concerned, it does not * use FIFOs but uses linked list. So there is no dependency * between pool size to allocate and the engine configuration.
*/
pool_size = PAGE_SIZE << 1;
} else { /* it is DMA0 or DMA1 */
idx = of_get_property(np, "cell-index", &len); if (!idx || (len != sizeof(u32))) {
dev_err(&ofdev->dev, "Device node %pOF has missing " "or invalid cell-index property\n",
np); return -EINVAL;
}
id = *idx; /* DMA0,1 engines use FIFO to maintain CDBs, so we * should allocate the pool accordingly to size of this * FIFO. Thus, the pool size depends on the FIFO depth: * how much CDBs pointers the FIFO may contain then so * much CDBs we should provide in the pool. * That is * CDB size = 32B; * CDBs number = (DMA0_FIFO_SIZE >> 3); * Pool size = CDBs number * CDB size = * = (DMA0_FIFO_SIZE >> 3) << 5 = DMA0_FIFO_SIZE << 2.
*/
pool_size = (id == PPC440SPE_DMA0_ID) ?
DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
pool_size <<= 2;
}
if (of_address_to_resource(np, 0, &res)) {
dev_err(&ofdev->dev, "failed to get memory resource\n");
initcode = PPC_ADMA_INIT_MEMRES;
ret = -ENODEV; goto out;
}
if (!request_mem_region(res.start, resource_size(&res),
dev_driver_string(&ofdev->dev))) {
dev_err(&ofdev->dev, "failed to request memory region %pR\n",
&res);
initcode = PPC_ADMA_INIT_MEMREG;
ret = -EBUSY; goto out;
}
/* create a device */
adev = kzalloc(sizeof(*adev), GFP_KERNEL); if (!adev) {
initcode = PPC_ADMA_INIT_ALLOC;
ret = -ENOMEM; goto err_adev_alloc;
}
adev->id = id;
adev->pool_size = pool_size; /* allocate coherent memory for hardware descriptors */
adev->dma_desc_pool_virt = dma_alloc_coherent(&ofdev->dev,
adev->pool_size, &adev->dma_desc_pool,
GFP_KERNEL); if (adev->dma_desc_pool_virt == NULL) {
dev_err(&ofdev->dev, "failed to allocate %d bytes of coherent " "memory for hardware descriptors\n",
adev->pool_size);
initcode = PPC_ADMA_INIT_COHERENT;
ret = -ENOMEM; goto err_dma_alloc;
}
dev_dbg(&ofdev->dev, "allocated descriptor pool virt 0x%p phys 0x%llx\n",
adev->dma_desc_pool_virt, (u64)adev->dma_desc_pool);
regs = ioremap(res.start, resource_size(&res)); if (!regs) {
dev_err(&ofdev->dev, "failed to ioremap regs!\n");
ret = -ENOMEM; goto err_regs_alloc;
}
if (adev->id == PPC440SPE_XOR_ID) {
adev->xor_reg = regs; /* Reset XOR */
iowrite32be(XOR_CRSR_XASR_BIT, &adev->xor_reg->crsr);
iowrite32be(XOR_CRSR_64BA_BIT, &adev->xor_reg->crrr);
} else {
size_t fifo_size = (adev->id == PPC440SPE_DMA0_ID) ?
DMA0_FIFO_SIZE : DMA1_FIFO_SIZE;
adev->dma_reg = regs; /* DMAx_FIFO_SIZE is defined in bytes, * <fsiz> - is defined in number of CDB pointers (8byte). * DMA FIFO Length = CSlength + CPlength, where * CSlength = CPlength = (fsiz + 1) * 8.
*/
iowrite32(DMA_FIFO_ENABLE | ((fifo_size >> 3) - 2),
&adev->dma_reg->fsiz); /* Configure DMA engine */
iowrite32(DMA_CFG_DXEPR_HP | DMA_CFG_DFMPP_HP | DMA_CFG_FALGN,
&adev->dma_reg->cfg); /* Clear Status */
iowrite32(~0, &adev->dma_reg->dsts);
}
/* * /sys driver interface to enable h/w RAID-6 capabilities * Files created in e.g. /sys/devices/plb.0/400100100.dma0/driver/ * directory are "devices", "enable" and "poly". * "devices" shows available engines. * "enable" is used to enable RAID-6 capabilities or to check * whether these has been activated. * "poly" allows setting/checking used polynomial (for PPC440SPe only).
*/
/* e.g., 0x14D or 0x11D */
err = kstrtoul(buf, 16, &val); if (err) return err;
if (val & ~0x1FF) return -EINVAL;
val &= 0xFF;
reg = dcr_read(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL);
reg &= ~(0xFF << MQ0_CFBHL_POLY);
reg |= val << MQ0_CFBHL_POLY;
dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_CFBHL, reg);
return count;
} static DRIVER_ATTR_RW(poly);
/* * Common initialisation for RAID engines; allocate memory for * DMAx FIFOs, perform configuration common for all DMA engines. * Further DMA engine specific configuration is done at probe time.
*/ staticint ppc440spe_configure_raid_devices(void)
{ struct device_node *np; struct resource i2o_res; struct i2o_regs __iomem *i2o_reg;
dcr_host_t i2o_dcr_host; unsignedint dcr_base, dcr_len; int i, ret;
np = of_find_compatible_node(NULL, NULL, "ibm,i2o-440spe"); if (!np) {
pr_err("%s: can't find I2O device tree node\n",
__func__); return -ENODEV;
}
if (of_address_to_resource(np, 0, &i2o_res)) {
of_node_put(np); return -EINVAL;
}
i2o_reg = of_iomap(np, 0); if (!i2o_reg) {
pr_err("%s: failed to map I2O registers\n", __func__);
of_node_put(np); return -EINVAL;
}
/* Get I2O DCRs base */
dcr_base = dcr_resource_start(np, 0);
dcr_len = dcr_resource_len(np, 0); if (!dcr_base && !dcr_len) {
pr_err("%pOF: can't get DCR registers base/len!\n", np);
of_node_put(np);
iounmap(i2o_reg); return -ENODEV;
}
i2o_dcr_host = dcr_map(np, dcr_base, dcr_len); if (!DCR_MAP_OK(i2o_dcr_host)) {
pr_err("%pOF: failed to map DCRs!\n", np);
of_node_put(np);
iounmap(i2o_reg); return -ENODEV;
}
of_node_put(np);
/* Provide memory regions for DMA's FIFOs: I2O, DMA0 and DMA1 share * the base address of FIFO memory space. * Actually we need twice more physical memory than programmed in the * <fsiz> register (because there are two FIFOs for each DMA: CP and CS)
*/
ppc440spe_dma_fifo_buf = kmalloc((DMA0_FIFO_SIZE + DMA1_FIFO_SIZE) << 1,
GFP_KERNEL); if (!ppc440spe_dma_fifo_buf) {
pr_err("%s: DMA FIFO buffer allocation failed.\n", __func__);
iounmap(i2o_reg);
dcr_unmap(i2o_dcr_host, dcr_len); return -ENOMEM;
}
/* Setup the base address of mmaped registers */
dcr_write(i2o_dcr_host, DCRN_I2O0_IBAH, (u32)(i2o_res.start >> 32));
dcr_write(i2o_dcr_host, DCRN_I2O0_IBAL, (u32)(i2o_res.start) |
I2O_REG_ENABLE);
dcr_unmap(i2o_dcr_host, dcr_len);
/* Setup FIFO memory space base address */
iowrite32(0, &i2o_reg->ifbah);
iowrite32(((u32)__pa(ppc440spe_dma_fifo_buf)), &i2o_reg->ifbal);
/* set zero FIFO size for I2O, so the whole * ppc440spe_dma_fifo_buf is used by DMAs. * DMAx_FIFOs will be configured while probe.
*/
iowrite32(0, &i2o_reg->ifsiz);
iounmap(i2o_reg);
/* To prepare WXOR/RXOR functionality we need access to * Memory Queue Module DCRs (finally it will be enabled * via /sys interface of the ppc440spe ADMA driver).
*/
np = of_find_compatible_node(NULL, NULL, "ibm,mq-440spe"); if (!np) {
pr_err("%s: can't find MQ device tree node\n",
__func__);
ret = -ENODEV; goto out_free;
}
/* Get MQ DCRs base */
dcr_base = dcr_resource_start(np, 0);
dcr_len = dcr_resource_len(np, 0); if (!dcr_base && !dcr_len) {
pr_err("%pOF: can't get DCR registers base/len!\n", np);
ret = -ENODEV; goto out_mq;
}
ppc440spe_mq_dcr_host = dcr_map(np, dcr_base, dcr_len); if (!DCR_MAP_OK(ppc440spe_mq_dcr_host)) {
pr_err("%pOF: failed to map DCRs!\n", np);
ret = -ENODEV; goto out_mq;
}
of_node_put(np);
ppc440spe_mq_dcr_len = dcr_len;
/* Set HB alias */
dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_BAUH, DMA_CUED_XOR_HB);
static __init int ppc440spe_adma_init(void)
{ int ret;
ret = ppc440spe_configure_raid_devices(); if (ret) return ret;
ret = platform_driver_register(&ppc440spe_adma_driver); if (ret) {
pr_err("%s: failed to register platform driver\n",
__func__); goto out_reg;
}
/* Initialization status */
ret = driver_create_file(&ppc440spe_adma_driver.driver,
&driver_attr_devices); if (ret) goto out_dev;
/* RAID-6 h/w enable entry */
ret = driver_create_file(&ppc440spe_adma_driver.driver,
&driver_attr_enable); if (ret) goto out_en;
/* GF polynomial to use */
ret = driver_create_file(&ppc440spe_adma_driver.driver,
&driver_attr_poly); if (!ret) return ret;
driver_remove_file(&ppc440spe_adma_driver.driver,
&driver_attr_enable);
out_en:
driver_remove_file(&ppc440spe_adma_driver.driver,
&driver_attr_devices);
out_dev: /* User will not be able to enable h/w RAID-6 */
pr_err("%s: failed to create RAID-6 driver interface\n",
__func__);
platform_driver_unregister(&ppc440spe_adma_driver);
out_reg:
dcr_unmap(ppc440spe_mq_dcr_host, ppc440spe_mq_dcr_len);
kfree(ppc440spe_dma_fifo_buf); return ret;
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.