/* * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. * * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) * Copyright (C) 2007 Maciej W. Rozycki * Copyright (C) 2008 Thiemo Seufer * Copyright (C) 2012 MIPS Technologies, Inc.
*/ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/mm.h> #include <linux/proc_fs.h>
/* * R6 has a limited offset of the pref instruction. * Skip it if the offset is more than 9 bits.
*/ #define _uasm_i_pref(a, b, c, d) \ do { \ if (cpu_has_mips_r6) { \ if (c <= 0xff && c >= -0x100) \
uasm_i_pref(a, b, c, d);\
} else { \
uasm_i_pref(a, b, c, d); \
} \
} while(0)
if (cpu_has_64bit_gp_regs)
copy_word_size = 8; else
copy_word_size = 4;
/* * The pref's used here are using "streaming" hints, which cause the * copied data to be kicked out of the cache sooner. A page copy often * ends up copying a lot more data than is commonly used, so this seems * to make sense in terms of reducing cache pollution, but I've no real * performance data to back this up.
*/ if (cpu_has_prefetch) { /* * XXX: Most prefetch bias values in here are based on * guesswork.
*/
cache_line_size = cpu_dcache_line_size(); switch (current_cpu_type()) { case CPU_R5500: case CPU_TX49XX: /* These processors only support the Pref_Load. */
pref_bias_copy_load = 256; break;
case CPU_R10000: case CPU_R12000: case CPU_R14000: case CPU_R16000: /* * Those values have been experimentally tuned for an * Origin 200.
*/
pref_bias_clear_store = 512;
pref_bias_copy_load = 256;
pref_bias_copy_store = 256;
pref_src_mode = Pref_LoadStreamed;
pref_dst_mode = Pref_StoreStreamed; break;
/* * This algorithm makes the following assumptions: * - The prefetch bias is a multiple of 2 words. * - The prefetch bias is less than one page.
*/
BUG_ON(pref_bias_clear_store % (2 * clear_word_size));
BUG_ON(PAGE_SIZE < pref_bias_clear_store);
off = PAGE_SIZE - pref_bias_clear_store; if (off > 0xffff || !pref_bias_clear_store)
pg_addiu(&buf, GPR_A2, GPR_A0, off); else
uasm_i_ori(&buf, GPR_A2, GPR_A0, off);
if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
uasm_i_lui(&buf, GPR_AT, uasm_rel_hi(0xa0000000));
off = cache_line_size ? min(8, pref_bias_clear_store / cache_line_size)
* cache_line_size : 0; while (off) {
build_clear_pref(&buf, -off);
off -= cache_line_size;
}
uasm_l_clear_pref(&l, buf); do {
build_clear_pref(&buf, off);
build_clear_store(&buf, off);
off += clear_word_size;
} while (off < half_clear_loop_size);
pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off);
off = -off; do {
build_clear_pref(&buf, off); if (off == -clear_word_size)
uasm_il_bne(&buf, &r, GPR_A0, GPR_A2, label_clear_pref);
build_clear_store(&buf, off);
off += clear_word_size;
} while (off < 0);
if (pref_bias_clear_store) {
pg_addiu(&buf, GPR_A2, GPR_A0, pref_bias_clear_store);
uasm_l_clear_nopref(&l, buf);
off = 0; do {
build_clear_store(&buf, off);
off += clear_word_size;
} while (off < half_clear_loop_size);
pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off);
off = -off; do { if (off == -clear_word_size)
uasm_il_bne(&buf, &r, GPR_A0, GPR_A2,
label_clear_nopref);
build_clear_store(&buf, off);
off += clear_word_size;
} while (off < 0);
}
/* * This algorithm makes the following assumptions: * - All prefetch biases are multiples of 8 words. * - The prefetch biases are less than one page. * - The store prefetch bias isn't greater than the load * prefetch bias.
*/
BUG_ON(pref_bias_copy_load % (8 * copy_word_size));
BUG_ON(pref_bias_copy_store % (8 * copy_word_size));
BUG_ON(PAGE_SIZE < pref_bias_copy_load);
BUG_ON(pref_bias_copy_store > pref_bias_copy_load);
off = PAGE_SIZE - pref_bias_copy_load; if (off > 0xffff || !pref_bias_copy_load)
pg_addiu(&buf, GPR_A2, GPR_A0, off); else
uasm_i_ori(&buf, GPR_A2, GPR_A0, off);
if (IS_ENABLED(CONFIG_WAR_R4600_V2_HIT_CACHEOP) && cpu_is_r4600_v2_x())
uasm_i_lui(&buf, GPR_AT, uasm_rel_hi(0xa0000000));
off = cache_line_size ? min(8, pref_bias_copy_load / cache_line_size) *
cache_line_size : 0; while (off) {
build_copy_load_pref(&buf, -off);
off -= cache_line_size;
}
off = cache_line_size ? min(8, pref_bias_copy_store / cache_line_size) *
cache_line_size : 0; while (off) {
build_copy_store_pref(&buf, -off);
off -= cache_line_size;
}
uasm_l_copy_pref_both(&l, buf); do {
build_copy_load_pref(&buf, off);
build_copy_load(&buf, GPR_T0, off);
build_copy_load_pref(&buf, off + copy_word_size);
build_copy_load(&buf, GPR_T1, off + copy_word_size);
build_copy_load_pref(&buf, off + 2 * copy_word_size);
build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size);
build_copy_load_pref(&buf, off + 3 * copy_word_size);
build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size);
build_copy_store_pref(&buf, off);
build_copy_store(&buf, GPR_T0, off);
build_copy_store_pref(&buf, off + copy_word_size);
build_copy_store(&buf, GPR_T1, off + copy_word_size);
build_copy_store_pref(&buf, off + 2 * copy_word_size);
build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size);
build_copy_store_pref(&buf, off + 3 * copy_word_size);
build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size);
off += 4 * copy_word_size;
} while (off < half_copy_loop_size);
pg_addiu(&buf, GPR_A1, GPR_A1, 2 * off);
pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off);
off = -off; do {
build_copy_load_pref(&buf, off);
build_copy_load(&buf, GPR_T0, off);
build_copy_load_pref(&buf, off + copy_word_size);
build_copy_load(&buf, GPR_T1, off + copy_word_size);
build_copy_load_pref(&buf, off + 2 * copy_word_size);
build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size);
build_copy_load_pref(&buf, off + 3 * copy_word_size);
build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size);
build_copy_store_pref(&buf, off);
build_copy_store(&buf, GPR_T0, off);
build_copy_store_pref(&buf, off + copy_word_size);
build_copy_store(&buf, GPR_T1, off + copy_word_size);
build_copy_store_pref(&buf, off + 2 * copy_word_size);
build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size);
build_copy_store_pref(&buf, off + 3 * copy_word_size); if (off == -(4 * copy_word_size))
uasm_il_bne(&buf, &r, GPR_A2, GPR_A0, label_copy_pref_both);
build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size);
off += 4 * copy_word_size;
} while (off < 0);
if (pref_bias_copy_load - pref_bias_copy_store) {
pg_addiu(&buf, GPR_A2, GPR_A0,
pref_bias_copy_load - pref_bias_copy_store);
uasm_l_copy_pref_store(&l, buf);
off = 0; do {
build_copy_load(&buf, GPR_T0, off);
build_copy_load(&buf, GPR_T1, off + copy_word_size);
build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size);
build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size);
build_copy_store_pref(&buf, off);
build_copy_store(&buf, GPR_T0, off);
build_copy_store_pref(&buf, off + copy_word_size);
build_copy_store(&buf, GPR_T1, off + copy_word_size);
build_copy_store_pref(&buf, off + 2 * copy_word_size);
build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size);
build_copy_store_pref(&buf, off + 3 * copy_word_size);
build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size);
off += 4 * copy_word_size;
} while (off < half_copy_loop_size);
pg_addiu(&buf, GPR_A1, GPR_A1, 2 * off);
pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off);
off = -off; do {
build_copy_load(&buf, GPR_T0, off);
build_copy_load(&buf, GPR_T1, off + copy_word_size);
build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size);
build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size);
build_copy_store_pref(&buf, off);
build_copy_store(&buf, GPR_T0, off);
build_copy_store_pref(&buf, off + copy_word_size);
build_copy_store(&buf, GPR_T1, off + copy_word_size);
build_copy_store_pref(&buf, off + 2 * copy_word_size);
build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size);
build_copy_store_pref(&buf, off + 3 * copy_word_size); if (off == -(4 * copy_word_size))
uasm_il_bne(&buf, &r, GPR_A2, GPR_A0,
label_copy_pref_store);
build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size);
off += 4 * copy_word_size;
} while (off < 0);
}
if (pref_bias_copy_store) {
pg_addiu(&buf, GPR_A2, GPR_A0, pref_bias_copy_store);
uasm_l_copy_nopref(&l, buf);
off = 0; do {
build_copy_load(&buf, GPR_T0, off);
build_copy_load(&buf, GPR_T1, off + copy_word_size);
build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size);
build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size);
build_copy_store(&buf, GPR_T0, off);
build_copy_store(&buf, GPR_T1, off + copy_word_size);
build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size);
build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size);
off += 4 * copy_word_size;
} while (off < half_copy_loop_size);
pg_addiu(&buf, GPR_A1, GPR_A1, 2 * off);
pg_addiu(&buf, GPR_A0, GPR_A0, 2 * off);
off = -off; do {
build_copy_load(&buf, GPR_T0, off);
build_copy_load(&buf, GPR_T1, off + copy_word_size);
build_copy_load(&buf, GPR_T2, off + 2 * copy_word_size);
build_copy_load(&buf, GPR_T3, off + 3 * copy_word_size);
build_copy_store(&buf, GPR_T0, off);
build_copy_store(&buf, GPR_T1, off + copy_word_size);
build_copy_store(&buf, GPR_T2, off + 2 * copy_word_size); if (off == -(4 * copy_word_size))
uasm_il_bne(&buf, &r, GPR_A2, GPR_A0,
label_copy_nopref);
build_copy_store(&buf, GPR_T3, off + 3 * copy_word_size);
off += 4 * copy_word_size;
} while (off < 0);
}
/* * Pad descriptors to cacheline, since each is exclusively owned by a * particular CPU.
*/ struct dmadscr {
u64 dscr_a;
u64 dscr_b;
u64 pad_a;
u64 pad_b;
} ____cacheline_aligned_in_smp page_descr[DM_NUM_CHANNELS];
/* * Don't really want to do it this way, but there's no * reliable way to delay completion detection.
*/ while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
& M_DM_DSCR_BASE_INTERRUPT))
;
__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
}
EXPORT_SYMBOL(clear_page);
/* if any page is not in KSEG0, use old way */ if ((long)KSEGX((unsignedlong)to) != (long)CKSEG0
|| (long)KSEGX((unsignedlong)from) != (long)CKSEG0) return copy_page_cpu(to, from);
/* * Don't really want to do it this way, but there's no * reliable way to delay completion detection.
*/ while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)))
& M_DM_DSCR_BASE_INTERRUPT))
;
__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
}
EXPORT_SYMBOL(copy_page);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.