/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2022 Michael T. Kloos <michael@michaelkloos.com>
*/
#include <linux/linkage.h>
#include <asm/asm.h>
SYM_FUNC_START(__memmove) /* * Returns * a0 - dest * * Parameters * a0 - Inclusive first byte of dest * a1 - Inclusive first byte of src * a2 - Length of copy n * * Because the return matches the parameter register a0, * we will not clobber or modify that register. * * Note: This currently only works on little-endian. * To port to big-endian, reverse the direction of shifts * in the 2 misaligned fixup copy loops.
*/
/* Return if nothing to do */
beq a0, a1, .Lreturn_from_memmove
beqz a2, .Lreturn_from_memmove
/* * Register Uses * Forward Copy: a1 - Index counter of src * Reverse Copy: a4 - Index counter of src * Forward Copy: t3 - Index counter of dest * Reverse Copy: t4 - Index counter of dest * Both Copy Modes: t5 - Inclusive first multibyte/aligned of dest * Both Copy Modes: t6 - Non-Inclusive last multibyte/aligned of dest * Both Copy Modes: t0 - Link / Temporary for load-store * Both Copy Modes: t1 - Temporary for load-store * Both Copy Modes: t2 - Temporary for load-store * Both Copy Modes: a5 - dest to src alignment offset * Both Copy Modes: a6 - Shift ammount * Both Copy Modes: a7 - Inverse Shift ammount * Both Copy Modes: a2 - Alternate breakpoint for unrolled loops
*/
/* * Solve for some register values now. * Byte copy does not need t5 or t6.
*/
mv t3, a0
add t4, a0, a2
add a4, a1, a2
/* * Byte copy if copying less than (2 * SZREG) bytes. This can * cause problems with the bulk copy implementation and is * small enough not to bother.
*/
andi t0, a2, -(2 * SZREG)
beqz t0, .Lbyte_copy
/* * Now solve for t5 and t6.
*/
andi t5, t3, -SZREG
andi t6, t4, -SZREG /* * If dest(Register t3) rounded down to the nearest naturally * aligned SZREG address, does not equal dest, then add SZREG * to find the low-bound of SZREG alignment in the dest memory * region. Note that this could overshoot the dest memory * region if n is less than SZREG. This is one reason why * we always byte copy if n is less than SZREG. * Otherwise, dest is already naturally aligned to SZREG.
*/
beq t5, t3, 1f
addi t5, t5, SZREG
1:
/* * If the dest and src are co-aligned to SZREG, then there is * no need for the full rigmarole of a full misaligned fixup copy. * Instead, do a simpler co-aligned copy.
*/
xor t0, a0, a1
andi t1, t0, (SZREG - 1)
beqz t1, .Lcoaligned_copy /* Fall through to misaligned fixup copy */
andi a5, a1, (SZREG - 1) /* Find the alignment offset of src (a1) */
slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ sub a5, a1, t3 /* Find the difference between src and dest */
andi a1, a1, -SZREG /* Align the src pointer */
addi a2, t6, SZREG /* The other breakpoint for the unrolled loop*/
/* * Compute The Inverse Shift * a7 = XLEN - a6 = XLEN + -a6 * 2s complement negation to find the negative: -a6 = ~a6 + 1 * Add that to XLEN. XLEN = SZREG * 8.
*/
not a7, a6
addi a7, a7, (SZREG * 8 + 1)
andi a5, a4, (SZREG - 1) /* Find the alignment offset of src (a4) */
slli a6, a5, 3 /* Multiply by 8 to convert that to bits to shift */ sub a5, a4, t4 /* Find the difference between src and dest */
andi a4, a4, -SZREG /* Align the src pointer */
addi a2, t5, -SZREG /* The other breakpoint for the unrolled loop*/
/* * Compute The Inverse Shift * a7 = XLEN - a6 = XLEN + -a6 * 2s complement negation to find the negative: -a6 = ~a6 + 1 * Add that to XLEN. XLEN = SZREG * 8.
*/
not a7, a6
addi a7, a7, (SZREG * 8 + 1)
bne t4, t5, 1b
2:
mv t4, t5 /* Fix the dest pointer in case the loop was broken */
add a4, t4, a5 /* Restore the src pointer */
j .Lbyte_copy_reverse /* Copy any remaining bytes */
/* * Simple copy loops for SZREG co-aligned memory locations. * These also make calls to do byte copies for any unaligned * data at their terminations.
*/
.Lcoaligned_copy:
bltu a1, a0, .Lcoaligned_copy_reverse
j .Lbyte_copy_reverse /* Copy any remaining bytes */
/* * These are basically sub-functions within the function. They * are used to byte copy until the dest pointer is in alignment. * At which point, a bulk copy method can be used by the * calling code. These work on the same registers as the bulk * copy loops. Therefore, the register values can be picked * up from where they were left and we avoid code duplication * without any overhead except the call in and return jumps.
*/
.Lbyte_copy_until_aligned_forward:
beq t3, t5, 2f
1:
lb t1, 0(a1)
addi a1, a1, 1
addi t3, t3, 1
sb t1, -1(t3)
bne t3, t5, 1b
2:
jalr zero, 0x0(t0) /* Return to multibyte copy loop */
/* * Simple byte copy loops. * These will byte copy until they reach the end of data to copy. * At that point, they will call to return from memmove.
*/
.Lbyte_copy:
bltu a1, a0, .Lbyte_copy_reverse
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.