/* * copy_user_nocache - Uncached memory copy with exception handling * * This copies from user space into kernel space, but the kernel * space accesses can take a machine check exception, so they too * need exception handling. * * Note: only 32-bit and 64-bit stores have non-temporal versions, * and we only use aligned versions. Any unaligned parts at the * start or end of the copy will be done using normal cached stores. * * Input: * rdi destination * rsi source * edx count * * Output: * rax uncopied bytes or 0 if successful.
*/
SYM_FUNC_START(__copy_user_nocache)
ANNOTATE_NOENDBR /* If destination is not 7-byte aligned, we'll have to align it */
testb $7,%dil
jne .Lalign
addq $64,%rsi
addq $64,%rdi sub $64,%edx
cmp $64,%edx
jae .Lunrolled
/* * First set of user mode loads have been done * without any stores, so if they fail, we can * just try the non-unrolled loop.
*/
_ASM_EXTABLE_UA(10b, .Lquadwords)
_ASM_EXTABLE_UA(11b, .Lquadwords)
_ASM_EXTABLE_UA(12b, .Lquadwords)
_ASM_EXTABLE_UA(13b, .Lquadwords)
/* * The second set of user mode loads have been * done with 32 bytes stored to the destination, * so we need to take that into account before * falling back to the unrolled loop.
*/
_ASM_EXTABLE_UA(30b, .Lfixup32)
_ASM_EXTABLE_UA(31b, .Lfixup32)
_ASM_EXTABLE_UA(32b, .Lfixup32)
_ASM_EXTABLE_UA(33b, .Lfixup32)
/* * An exception on a write means that we're * done, but we need to update the count * depending on where in the unrolled loop * we were.
*/
_ASM_EXTABLE_UA(20b, .Ldone0)
_ASM_EXTABLE_UA(21b, .Ldone8)
_ASM_EXTABLE_UA(22b, .Ldone16)
_ASM_EXTABLE_UA(23b, .Ldone24)
_ASM_EXTABLE_UA(40b, .Ldone32)
_ASM_EXTABLE_UA(41b, .Ldone40)
_ASM_EXTABLE_UA(42b, .Ldone48)
_ASM_EXTABLE_UA(43b, .Ldone56)
/* * If we fail on the last full quadword, we will * not try to do any byte-wise cached accesses. * We will try to do one more 4-byte uncached * one, though.
*/
_ASM_EXTABLE_UA(50b, .Llast4)
_ASM_EXTABLE_UA(51b, .Ldone0)
.Llong:
test $4,%dl
je .Lword
60: movl (%rsi),%eax
61: movnti %eax,(%rdi)
addq $4,%rsi
addq $4,%rdi sub $4,%edx
.Lword:
sfence
test $2,%dl
je .Lbyte
70: movw (%rsi),%ax
71: movw %ax,(%rdi)
addq $2,%rsi
addq $2,%rdi sub $2,%edx
.Lbyte:
test $1,%dl
je .Ldone
80: movb (%rsi),%al
81: movb %al,(%rdi)
dec %edx
.Ldone:
mov %edx,%eax
RET
/* * If we fail on the last four bytes, we won't * bother with any fixups. It's dead, Jim. Note * that there's no need for 'sfence' for any * of this, since the exception will have been * serializing.
*/
_ASM_EXTABLE_UA(60b, .Ldone)
_ASM_EXTABLE_UA(61b, .Ldone)
_ASM_EXTABLE_UA(70b, .Ldone)
_ASM_EXTABLE_UA(71b, .Ldone)
_ASM_EXTABLE_UA(80b, .Ldone)
_ASM_EXTABLE_UA(81b, .Ldone)
/* * This is the "head needs aliging" case when * the destination isn't 8-byte aligned. The * 4-byte case can be done uncached, but any * smaller alignment is done with regular stores.
*/
.Lalign:
test $1,%dil
je .Lalign_word
test %edx,%edx
je .Ldone
90: movb (%rsi),%al
91: movb %al,(%rdi)
inc %rsi
inc %rdi
dec %edx
.Lalign_word:
test $2,%dil
je .Lalign_long
cmp $2,%edx
jb .Lbyte
92: movw (%rsi),%ax
93: movw %ax,(%rdi)
addq $2,%rsi
addq $2,%rdi sub $2,%edx
.Lalign_long:
test $4,%dil
je .Lis_aligned
cmp $4,%edx
jb .Lword
94: movl (%rsi),%eax
95: movnti %eax,(%rdi)
addq $4,%rsi
addq $4,%rdi sub $4,%edx
jmp .Lis_aligned
/* * If we fail on the initial alignment accesses, * we're all done. Again, no point in trying to * do byte-by-byte probing if the 4-byte load * fails - we're not doing any uncached accesses * any more.
*/
_ASM_EXTABLE_UA(90b, .Ldone)
_ASM_EXTABLE_UA(91b, .Ldone)
_ASM_EXTABLE_UA(92b, .Ldone)
_ASM_EXTABLE_UA(93b, .Ldone)
_ASM_EXTABLE_UA(94b, .Ldone)
_ASM_EXTABLE_UA(95b, .Ldone)
/* * Exception table fixups for faults in the middle
*/
.Ldone56: sub $8,%edx
.Ldone48: sub $8,%edx
.Ldone40: sub $8,%edx
.Ldone32: sub $8,%edx
.Ldone24: sub $8,%edx
.Ldone16: sub $8,%edx
.Ldone8: sub $8,%edx
.Ldone0:
mov %edx,%eax
RET
.Lfixup32:
addq $32,%rsi
addq $32,%rdi sub $32,%edx
jmp .Lquadwords
.Llast4:
52: movl (%rsi),%eax
53: movnti %eax,(%rdi)
sfence sub $4,%edx
mov %edx,%eax
RET
_ASM_EXTABLE_UA(52b, .Ldone0)
_ASM_EXTABLE_UA(53b, .Ldone0)
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.