/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2013 ARM Ltd. * Copyright (C) 2013 Linaro. * * This code is based on glibc cortex strings work originally authored by Linaro * be found @ * * http://bazaar.launchpad.net/~linaro-toolchain-dev/cortex-strings/trunk/ * files/head:/src/aarch64/
*/
/* * Copy a buffer from src to dest (alignment handled by the hardware) * * Parameters: * x0 - dest * x1 - src * x2 - n * Returns: * x0 - dest
*/
dstin .req x0
src .req x1
count .req x2
tmp1 .req x3
tmp1w .req w3
tmp2 .req x4
tmp2w .req w4
dst .req x6
#ifdef CONFIG_AS_HAS_MOPS
alternative_if_not ARM64_HAS_MOPS
b .Lno_mops
alternative_else_nop_endif
cpy1 dst, src, count
b .Lexitfunc
.Lno_mops:
#endif
cmp count, #16 /*When memory length is less than 16, the accessed are not aligned.*/
b.lo .Ltiny15
neg tmp2, src
ands tmp2, tmp2, #15/* Bytes to reach alignment. */
b.eq .LSrcAligned sub count, count, tmp2 /* * Copy the leading memory data from src to dst in an increasing * address order.By this way,the risk of overwriting the source * memory data is eliminated when the distance between src and * dst is less than 16. The memory accesses here are alignment.
*/
tbz tmp2, #0, 1f
ldrb1 tmp1w, src, #1
strb1 tmp1w, dst, #1
1:
tbz tmp2, #1, 2f
ldrh1 tmp1w, src, #2
strh1 tmp1w, dst, #2
2:
tbz tmp2, #2, 3f
ldr1 tmp1w, src, #4
str1 tmp1w, dst, #4
3:
tbz tmp2, #3, .LSrcAligned
ldr1 tmp1, src, #8
str1 tmp1, dst, #8
.LSrcAligned:
cmp count, #64
b.ge .Lcpy_over64 /* * Deal with small copies quickly by dropping straight into the * exit block.
*/
.Ltail63: /* * Copy up to 48 bytes of data. At this point we only need the * bottom 6 bits of count to be accurate.
*/
ands tmp1, count, #0x30
b.eq .Ltiny15
cmp tmp1w, #0x20
b.eq 1f
b.lt 2f
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
1:
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
2:
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
.Ltiny15: /* * Prefer to break one ldp/stp into several load/store to access * memory in an increasing address order,rather than to load/store 16 * bytes from (src-16) to (dst-16) and to backward the src to aligned * address,which way is used in original cortex memcpy. If keeping * the original memcpy process here, memmove need to satisfy the * precondition that src address is at least 16 bytes bigger than dst * address,otherwise some source data will be overwritten when memove * call memcpy directly. To make memmove simpler and decouple the * memcpy's dependency on memmove, withdrew the original process.
*/
tbz count, #3, 1f
ldr1 tmp1, src, #8
str1 tmp1, dst, #8
1:
tbz count, #2, 2f
ldr1 tmp1w, src, #4
str1 tmp1w, dst, #4
2:
tbz count, #1, 3f
ldrh1 tmp1w, src, #2
strh1 tmp1w, dst, #2
3:
tbz count, #0, .Lexitfunc
ldrb1 tmp1w, src, #1
strb1 tmp1w, dst, #1
b .Lexitfunc
.Lcpy_over64:
subs count, count, #128
b.ge .Lcpy_body_large /* * Less than 128 bytes to copy, so handle 64 here and then jump * to the tail.
*/
ldp1 A_l, A_h, src, #16
stp1 A_l, A_h, dst, #16
ldp1 B_l, B_h, src, #16
ldp1 C_l, C_h, src, #16
stp1 B_l, B_h, dst, #16
stp1 C_l, C_h, dst, #16
ldp1 D_l, D_h, src, #16
stp1 D_l, D_h, dst, #16
tst count, #0x3f
b.ne .Ltail63
b .Lexitfunc
/* * Critical loop. Start at a new cache line boundary. Assuming * 64 bytes per line this ensures the entire loop is in one line.
*/
.p2align L1_CACHE_SHIFT
.Lcpy_body_large: /* pre-get 64 bytes data. */
ldp1 A_l, A_h, src, #16
ldp1 B_l, B_h, src, #16
ldp1 C_l, C_h, src, #16
ldp1 D_l, D_h, src, #16
1: /* * interlace the load of next 64 bytes data block with store of the last * loaded 64 bytes data.
*/
stp1 A_l, A_h, dst, #16
ldp1 A_l, A_h, src, #16
stp1 B_l, B_h, dst, #16
ldp1 B_l, B_h, src, #16
stp1 C_l, C_h, dst, #16
ldp1 C_l, C_h, src, #16
stp1 D_l, D_h, dst, #16
ldp1 D_l, D_h, src, #16
subs count, count, #64
b.ge 1b
stp1 A_l, A_h, dst, #16
stp1 B_l, B_h, dst, #16
stp1 C_l, C_h, dst, #16
stp1 D_l, D_h, dst, #16
tst count, #0x3f
b.ne .Ltail63
.Lexitfunc:
Messung V0.5
¤ Dauer der Verarbeitung: 0.11 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.