/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * IP/TCP/UDP checksumming routines * * Xtensa version: Copyright (C) 2001 Tensilica, Inc. by Kevin Chea * Optimized by Joe Taylor
*/
/* * computes a partial checksum, e.g. for TCP/UDP fragments
*/
/* * unsigned int csum_partial(const unsigned char *buf, int len, * unsigned int sum); * a2 = buf * a3 = len * a4 = sum * * This function assumes 2- or 4-byte alignment. Other alignments will fail!
*/
l16ui a6, a2, 0 /* common case, len >= 2 */
ONES_ADD(a4, a6)
addi a2, a2, 2 /* adjust buf */
addi a3, a3, -2 /* adjust len */
j 1b /* now buf is 4-byte aligned */
/* case: odd-byte aligned, len > 1 * This case is dog slow, so don't give us an odd address. * (I don't think this ever happens, but just in case.)
*/
8:
srli a5, a3, 2 /* 4-byte chunks */
#if XCHAL_HAVE_LOOPS
loopgtz a5, 2f
#else
beqz a5, 2f
slli a5, a5, 2
add a5, a5, a2 /* a5 = end of last 4-byte chunk */
.Loop3:
#endif
l8ui a6, a2, 0 /* bits 24..31 */
l16ui a7, a2, 1 /* bits 8..23 */
l8ui a8, a2, 3 /* bits 0.. 8 */
#ifdef __XTENSA_EB__
slli a6, a6, 24
#else
slli a8, a8, 24
#endif
slli a7, a7, 8
or a7, a7, a6
or a7, a7, a8
ONES_ADD(a4, a7)
addi a2, a2, 4
#if !XCHAL_HAVE_LOOPS
blt a2, a5, .Loop3
#endif
2:
_bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */
l8ui a6, a2, 0
l8ui a7, a2, 1
#ifdef __XTENSA_EB__
slli a6, a6, 8
#else
slli a7, a7, 8
#endif
or a7, a7, a6
ONES_ADD(a4, a7)
addi a2, a2, 2
3:
j 5b /* branch to handle the remaining byte */
ENDPROC(csum_partial)
EXPORT_SYMBOL(csum_partial)
/* * Copy from ds while checksumming, otherwise like csum_partial
*/
/* unsigned int csum_partial_copy_generic (const char *src, char *dst, int len) a2 = src a3 = dst a4 = len a5 = sum a8 = temp a9 = temp a10 = temp
This function is optimized for 4-byte aligned addresses. Other alignments work, but not nearly as efficiently.
*/
ENTRY(csum_partial_copy_generic)
abi_entry_default
movi a5, -1
or a10, a2, a3
/* We optimize the following alignment tests for the 4-byte aligned case. Two bbsi.l instructions might seem more optimal (commented out below). However, both labels 5: and 3: are out of the imm8 range, so the assembler relaxes them into equivalent bbci.l, j combinations, which is actually
slower. */
extui a9, a10, 0, 2
beqz a9, 1f /* branch if both are 4-byte aligned */
bbsi.l a10, 0, 5f /* branch if one address is odd */
j 3f /* one address is 2-byte aligned */
5: /* Control branch to here when either src or dst is odd. We process all bytes using 8-bit accesses. Grossly inefficient,
so don't feed us an odd address. */
srli a10, a4, 1 /* handle in pairs for 16-bit csum */
#if XCHAL_HAVE_LOOPS
loopgtz a10, 6f
#else
beqz a10, 6f
slli a10, a10, 1
add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */
.Loop8:
#endif
EX(10f) l8ui a9, a2, 0
EX(10f) l8ui a8, a2, 1
EX(10f) s8i a9, a3, 0
EX(10f) s8i a8, a3, 1
#ifdef __XTENSA_EB__
slli a9, a9, 8 /* combine into a single 16-bit value */
#else /* for checksum computation */
slli a8, a8, 8
#endif
or a9, a9, a8
ONES_ADD(a5, a9)
addi a2, a2, 2
addi a3, a3, 2
#if !XCHAL_HAVE_LOOPS
blt a2, a10, .Loop8
#endif
6:
j 4b /* process the possible trailing odd byte */
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.