/* * Use dcbz on the complete cache lines in the destination * to set them to zero. This requires that the destination * area is cacheable. -- paulus * * During early init, cache might not be active yet, so dcbz cannot be used. * We therefore skip the optimised bloc that uses dcbz. This jump is * replaced by a nop once cache is active. This is done in machine_init()
*/
_GLOBAL_KASAN(memset)
cmplwi 0,r5,4
blt 7f
rlwimi r4,r4,8,16,23
rlwimi r4,r4,16,0,15
stw r4,0(r3)
beqlr
andi. r0,r3,3
add r5,r0,r5
subf r6,r0,r3
cmplwi 0,r4,0 /* * Skip optimised bloc until cache is enabled. Will be replaced * by 'bne' during boot to use normal procedure if r4 is not zero
*/
5: b 2f
patch_site 5b, patch__memset_nocache
clrlwi r7,r6,32-LG_CACHELINE_BYTES
add r8,r7,r5
srwi r9,r8,LG_CACHELINE_BYTES
addic. r9,r9,-1 /* total number of complete cachelines */
ble 2f
xori r0,r7,CACHELINE_MASK & ~3
srwi. r0,r0,2
beq 3f
mtctr r0
4: stwu r4,4(r6)
bdnz 4b
3: mtctr r9
li r7,4
10: dcbz r7,r6
addi r6,r6,CACHELINE_BYTES
bdnz 10b
clrlwi r5,r8,32-LG_CACHELINE_BYTES
addi r5,r5,4
/* * This version uses dcbz on the complete cache lines in the * destination area to reduce memory traffic. This requires that * the destination area is cacheable. * We only use this version if the source and dest don't overlap. * -- paulus. * * During early init, cache might not be active yet, so dcbz cannot be used. * We therefore jump to generic_memcpy which doesn't use dcbz. This jump is * replaced by a nop once cache is active. This is done in machine_init()
*/
_GLOBAL_KASAN(memmove)
cmplw 0,r3,r4
bgt backwards_memcpy /* fall through */
_GLOBAL_KASAN(memcpy)
1: b generic_memcpy
patch_site 1b, patch__memcpy_nocache
add r7,r3,r5 /* test if the src & dst overlap */
add r8,r4,r5
cmplw 0,r4,r7
cmplw 1,r3,r8
crand 0,0,4 /* cr0.lt &= cr1.lt */
blt generic_memcpy /* if regions overlap */
addi r4,r4,-4
addi r6,r3,-4
neg r0,r3
andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
beq 58f
cmplw 0,r5,r0 /* is this more than total to do? */
blt 63f /* if not much to do */
andi. r8,r0,3 /* get it word-aligned first */
subf r5,r0,r5
mtctr r8
beq+ 61f
70: lbz r9,4(r4) /* do some bytes */
addi r4,r4,1
addi r6,r6,1
stb r9,3(r6)
bdnz 70b
61: srwi. r0,r0,2
mtctr r0
beq 58f
72: lwzu r9,4(r4) /* do some words */
stwu r9,4(r6)
bdnz 72b
_GLOBAL(__copy_tofrom_user)
addi r4,r4,-4
addi r6,r3,-4
neg r0,r3
andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
beq 58f
cmplw 0,r5,r0 /* is this more than total to do? */
blt 63f /* if not much to do */
andi. r8,r0,3 /* get it word-aligned first */
mtctr r8
beq+ 61f
70: lbz r9,4(r4) /* do some bytes */
71: stb r9,4(r6)
addi r4,r4,1
addi r6,r6,1
bdnz 70b
61: subf r5,r0,r5
srwi. r0,r0,2
mtctr r0
beq 58f
72: lwzu r9,4(r4) /* do some words */
73: stwu r9,4(r6)
bdnz 72b
/* Here we decide how far ahead to prefetch the source */
li r3,4
cmpwi r0,1
li r7,0
ble 114f
li r7,1
#if MAX_COPY_PREFETCH > 1 /* Heuristically, for large transfers we prefetch MAX_COPY_PREFETCH cachelines ahead. For small transfers
we prefetch 1 cacheline ahead. */
cmpwi r0,MAX_COPY_PREFETCH
ble 112f
li r7,MAX_COPY_PREFETCH
112: mtctr r7
111: dcbt r3,r4
addi r3,r3,CACHELINE_BYTES
bdnz 111b
#else
dcbt r3,r4
addi r3,r3,CACHELINE_BYTES
#endif /* MAX_COPY_PREFETCH > 1 */
114: subf r8,r7,r0
mr r0,r7
mtctr r8
53: dcbt r3,r4
54: dcbz r11,r6
EX_TABLE(54b,105f) /* the main body of the cacheline loop */
COPY_16_BYTES_WITHEX(0)
#if L1_CACHE_BYTES >= 32
COPY_16_BYTES_WITHEX(1)
#if L1_CACHE_BYTES >= 64
COPY_16_BYTES_WITHEX(2)
COPY_16_BYTES_WITHEX(3)
#if L1_CACHE_BYTES >= 128
COPY_16_BYTES_WITHEX(4)
COPY_16_BYTES_WITHEX(5)
COPY_16_BYTES_WITHEX(6)
COPY_16_BYTES_WITHEX(7)
#endif
#endif
#endif
bdnz 53b
cmpwi r0,0
li r3,4
li r7,0
bne 114b
/* read fault, initial single-byte copy */
100: li r9,0
b 90f /* write fault, initial single-byte copy */
101: li r9,1
90: subf r5,r8,r5
li r3,0
b 99f /* read fault, initial word copy */
102: li r9,0
b 91f /* write fault, initial word copy */
103: li r9,1
91: li r3,2
b 99f
/* * this stuff handles faults in the cacheline loop and branches to either * 104f (if in read part) or 105f (if in write part), after updating r5
*/
COPY_16_BYTES_EXCODE(0)
#if L1_CACHE_BYTES >= 32
COPY_16_BYTES_EXCODE(1)
#if L1_CACHE_BYTES >= 64
COPY_16_BYTES_EXCODE(2)
COPY_16_BYTES_EXCODE(3)
#if L1_CACHE_BYTES >= 128
COPY_16_BYTES_EXCODE(4)
COPY_16_BYTES_EXCODE(5)
COPY_16_BYTES_EXCODE(6)
COPY_16_BYTES_EXCODE(7)
#endif
#endif
#endif
/* read fault in cacheline loop */
104: li r9,0
b 92f /* fault on dcbz (effectively a write fault) */ /* or write fault in cacheline loop */
105: li r9,1
92: li r3,LG_CACHELINE_BYTES
mfctr r8
add r0,r0,r8
b 106f /* read fault in final word loop */
108: li r9,0
b 93f /* write fault in final word loop */
109: li r9,1
93: andi. r5,r5,3
li r3,2
b 99f /* read fault in final byte loop */
110: li r9,0
b 94f /* write fault in final byte loop */
111: li r9,1
94: li r5,0
li r3,0 /* * At this stage the number of bytes not copied is * r5 + (ctr << r3), and r9 is 0 for read or 1 for write.
*/
99: mfctr r0
106: slw r3,r0,r3
add. r3,r3,r5
beq 120f /* shouldn't happen */
cmpwi 0,r9,0
bne 120f /* for a read fault, first try to continue the copy one byte at a time */
mtctr r3
130: lbz r0,4(r4)
131: stb r0,4(r6)
addi r4,r4,1
addi r6,r6,1
bdnz 130b /* then clear out the destination: r3 bytes starting at 4(r6) */
132: mfctr r3
120: blr
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.