/* * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. * * Quick'n'dirty IP checksum ... * * Copyright (C) 1998, 1999 Ralf Baechle * Copyright (C) 1999 Silicon Graphics, Inc. * Copyright (C) 2007 Maciej W. Rozycki * Copyright (C) 2014 Imagination Technologies Ltd.
*/
#include <linux/errno.h>
#include <linux/export.h>
#include <asm/asm.h>
#include <asm/asm-offsets.h>
#include <asm/regdef.h>
#ifdef CONFIG_64BIT /* * As we are sharing code base with the mips32 tree (which use the o32 ABI * register definitions). We need to redefine the register definitions from * the n64 ABI register naming to the o32 ABI register naming.
*/
#undef t0
#undef t1
#undef t2
#undef t3
#define t0 $8
#define t1 $9
#define t2 $10
#define t3 $11
#define t4 $12
#define t5 $13
#define t6 $14
#define t7 $15
/* odd buffer alignment? */
#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \
defined(CONFIG_CPU_LOONGSON64)
.set push
.set arch=mips32r2
wsbh v1, sum
movn sum, v1, t7
.set pop
#else
beqz t7, 1f /* odd buffer alignment? */
lui v1, 0x00ff
addu v1, 0x00ff
and t0, sum, v1 sll t0, t0, 8 srl sum, sum, 8
and sum, sum, v1
or sum, sum, t0
1:
#endif
.set reorder /* Add the passed partial csum. */
ADDC32(sum, a2)
jr ra
.set noreorder
END(csum_partial)
/* * checksum and copy routines based on memcpy.S * * csum_partial_copy_nocheck(src, dst, len) * __csum_partial_copy_kernel(src, dst, len) * * See "Spec" in memcpy.S for details. Unlike __copy_user, all * function in this file use the standard calling convention.
*/
#define src a0
#define dst a1
#define len a2
#define sum v0
#define odd t8
.macro __BUILD_CSUM_PARTIAL_COPY_USER mode, from, to
li sum, -1
move odd, zero /* * Note: dst & src may be unaligned, len may be 0 * Temps
*/ /* * The "issue break"s below are very approximate. * Issue delays for dcache fills will perturb the schedule, as will * load queue full replay traps, etc. * * If len < NBYTES use byte operations.
*/
sltu t2, len, NBYTES
and t1, dst, ADDRMASK
bnez t2, .Lcopy_bytes_checklen\@
and t0, src, ADDRMASK
andi odd, dst, 0x1 /* odd buffer? */
bnez t1, .Ldst_unaligned\@
nop
bnez t0, .Lsrc_unaligned_dst_aligned\@ /* * use delay slot for fall-through * src and dst are aligned; need to compute rem
*/
.Lboth_aligned\@: SRL t0, len, LOG_NBYTES+3 # +3 for 8 units/iter
beqz t0, .Lcleanup_both_aligned\@ # len < 8*NBYTES
nop SUB len, 8*NBYTES # subtract here for bgez loop
.align 4
1:
LOAD(t0, UNIT(0)(src))
LOAD(t1, UNIT(1)(src))
LOAD(t2, UNIT(2)(src))
LOAD(t3, UNIT(3)(src))
LOAD(t4, UNIT(4)(src))
LOAD(t5, UNIT(5)(src))
LOAD(t6, UNIT(6)(src))
LOAD(t7, UNIT(7)(src)) SUB len, len, 8*NBYTES
ADD src, src, 8*NBYTES
STORE(t0, UNIT(0)(dst))
ADDC(t0, t1)
STORE(t1, UNIT(1)(dst))
ADDC(sum, t0)
STORE(t2, UNIT(2)(dst))
ADDC(t2, t3)
STORE(t3, UNIT(3)(dst))
ADDC(sum, t2)
STORE(t4, UNIT(4)(dst))
ADDC(t4, t5)
STORE(t5, UNIT(5)(dst))
ADDC(sum, t4)
STORE(t6, UNIT(6)(dst))
ADDC(t6, t7)
STORE(t7, UNIT(7)(dst))
ADDC(sum, t6)
.set reorder /* DADDI_WAR */
ADD dst, dst, 8*NBYTES
bgez len, 1b
.set noreorder
ADD len, 8*NBYTES # revert len (see above)
/* * len == the number of bytes left to copy < 8*NBYTES
*/
.Lcleanup_both_aligned\@:
#define rem t7
beqz len, .Ldone\@
sltu t0, len, 4*NBYTES
bnez t0, .Lless_than_4units\@
and rem, len, (NBYTES-1) # rem = len % NBYTES /* * len >= 4*NBYTES
*/
LOAD(t0, UNIT(0)(src))
LOAD(t1, UNIT(1)(src))
LOAD(t2, UNIT(2)(src))
LOAD(t3, UNIT(3)(src)) SUB len, len, 4*NBYTES
ADD src, src, 4*NBYTES
STORE(t0, UNIT(0)(dst))
ADDC(t0, t1)
STORE(t1, UNIT(1)(dst))
ADDC(sum, t0)
STORE(t2, UNIT(2)(dst))
ADDC(t2, t3)
STORE(t3, UNIT(3)(dst))
ADDC(sum, t2)
.set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES
beqz len, .Ldone\@
.set noreorder
.Lless_than_4units\@: /* * rem = len % NBYTES
*/
beq rem, len, .Lcopy_bytes\@
nop
1:
LOAD(t0, 0(src))
ADD src, src, NBYTES SUB len, len, NBYTES
STORE(t0, 0(dst))
ADDC(sum, t0)
.set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES
bne rem, len, 1b
.set noreorder
/* * src and dst are aligned, need to copy rem bytes (rem < NBYTES) * A loop would do only a byte at a time with possible branch * mispredicts. Can't do an explicit LOAD dst,mask,or,STORE * because can't assume read-access to dst. Instead, use * STREST dst, which doesn't require read access to dst. * * This code should perform better than a simple loop on modern, * wide-issue mips processors because the code has fewer branches and * more instruction-level parallelism.
*/
#define bits t2
beqz len, .Ldone\@
ADD t1, dst, len # t1 is just past last byte of dst
li bits, 8*NBYTES SLL rem, len, 3 # rem = number of bits to keep
LOAD(t0, 0(src)) SUB bits, bits, rem # bits = number of bits to discard
SHIFT_DISCARD t0, t0, bits
STREST(t0, -1(t1))
SHIFT_DISCARD_REVERT t0, t0, bits
.set reorder
ADDC(sum, t0)
b .Ldone\@
.set noreorder
.Ldst_unaligned\@: /* * dst is unaligned * t0 = src & ADDRMASK * t1 = dst & ADDRMASK; T1 > 0 * len >= NBYTES * * Copy enough bytes to align dst * Set match = (src and dst have same alignment)
*/
#define match rem
LDFIRST(t3, FIRST(0)(src))
ADD t2, zero, NBYTES
LDREST(t3, REST(0)(src)) SUB t2, t2, t1 # t2 = number of bytes copied
xor match, t0, t1
STFIRST(t3, FIRST(0)(dst)) SLL t4, t1, 3 # t4 = number of bits to discard
SHIFT_DISCARD t3, t3, t4 /* no SHIFT_DISCARD_REVERT to handle odd buffer properly */
ADDC(sum, t3)
beq len, t2, .Ldone\@ SUB len, len, t2
ADD dst, dst, t2
beqz match, .Lboth_aligned\@
ADD src, src, t2
.Lsrc_unaligned_dst_aligned\@: SRL t0, len, LOG_NBYTES+2 # +2 for 4 units/iter
beqz t0, .Lcleanup_src_unaligned\@
and rem, len, (4*NBYTES-1) # rem = len % 4*NBYTES
1: /* * Avoid consecutive LD*'s to the same register since some mips * implementations can't issue them in the same cycle. * It's OK to load FIRST(N+1) before REST(N) because the two addresses * are to the same unit (unless src is aligned, but it's not).
*/
LDFIRST(t0, FIRST(0)(src))
LDFIRST(t1, FIRST(1)(src)) SUB len, len, 4*NBYTES
LDREST(t0, REST(0)(src))
LDREST(t1, REST(1)(src))
LDFIRST(t2, FIRST(2)(src))
LDFIRST(t3, FIRST(3)(src))
LDREST(t2, REST(2)(src))
LDREST(t3, REST(3)(src))
ADD src, src, 4*NBYTES
#ifdef CONFIG_CPU_SB1
nop # improves slotting
#endif
STORE(t0, UNIT(0)(dst))
ADDC(t0, t1)
STORE(t1, UNIT(1)(dst))
ADDC(sum, t0)
STORE(t2, UNIT(2)(dst))
ADDC(t2, t3)
STORE(t3, UNIT(3)(dst))
ADDC(sum, t2)
.set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES
bne len, rem, 1b
.set noreorder
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.