Quelle csum-copy_64.S Sprache: Sparc

/*
* Copyright 2002, 2003 Andi Kleen, SuSE Labs.
*
* This file is subject to the terms and conditions of the GNU General Public
* License.  See the file COPYING in the main directory of this archive
* for more details. No warranty for anything given at all.
*/
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/asm.h>

/*
* Checksum copy with exception handling.
* On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the
* destination is zeroed.
*
* Input
* rdi  source
* rsi  destination
* edx  len (32bit)
*
* Output
* eax  64bit sum. undefined in case of exception.
*
* Wrappers need to take care of valid exception sum and zeroing.
* They also should align source or destination to 8 bytes.
*/

.macro source
10:
_ASM_EXTABLE_UA(10b, .Lfault)
.endm

.macro dest
20:
_ASM_EXTABLE_UA(20b, .Lfault)
.endm

SYM_FUNC_START(csum_partial_copy_generic)
subq  $5*8, %rsp
movq  %rbx, 0*8(%rsp)
movq  %r12, 1*8(%rsp)
movq  %r14, 2*8(%rsp)
movq  %r13, 3*8(%rsp)
movq  %r15, 4*8(%rsp)

movl  $-1, %eax
xorl  %r9d, %r9d
movl  %edx, %ecx
cmpl  $8, %ecx
jb    .Lshort

testb  $7, %sil
jne   .Lunaligned
.Laligned:
movl  %ecx, %r12d

shrq  $6, %r12
jz .Lhandle_tail       /* < 64 */

clc

/* main loop. clear in 64 byte blocks */
/* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */
/* r11: temp3, rdx: temp4, r12 loopcnt */
/* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */
.p2align 4
.Lloop:
source
movq  (%rdi), %rbx
source
movq  8(%rdi), %r8
source
movq  16(%rdi), %r11
source
movq  24(%rdi), %rdx

source
movq  32(%rdi), %r10
source
movq  40(%rdi), %r15
source
movq  48(%rdi), %r14
source
movq  56(%rdi), %r13

30:
/*
* No _ASM_EXTABLE_UA; this is used for intentional prefetch on a
* potentially unmapped kernel address.
*/
_ASM_EXTABLE(30b, 2f)
prefetcht0 5*64(%rdi)
2:
adcq  %rbx, %rax
adcq  %r8, %rax
adcq  %r11, %rax
adcq  %rdx, %rax
adcq  %r10, %rax
adcq  %r15, %rax
adcq  %r14, %rax
adcq  %r13, %rax

decl %r12d

dest
movq %rbx, (%rsi)
dest
movq %r8, 8(%rsi)
dest
movq %r11, 16(%rsi)
dest
movq %rdx, 24(%rsi)

dest
movq %r10, 32(%rsi)
dest
movq %r15, 40(%rsi)
dest
movq %r14, 48(%rsi)
dest
movq %r13, 56(%rsi)

leaq 64(%rdi), %rdi
leaq 64(%rsi), %rsi

jnz .Lloop

adcq  %r9, %rax

/* do last up to 56 bytes */
.Lhandle_tail:
/* ecx: count, rcx.63: the end result needs to be rol8 */
movq %rcx, %r10
andl $63, %ecx
shrl $3, %ecx
jz .Lfold
clc
.p2align 4
.Lloop_8:
source
movq (%rdi), %rbx
adcq %rbx, %rax
decl %ecx
dest
movq %rbx, (%rsi)
leaq 8(%rsi), %rsi /* preserve carry */
leaq 8(%rdi), %rdi
jnz .Lloop_8
adcq %r9, %rax /* add in carry */

.Lfold:
/* reduce checksum to 32bits */
movl %eax, %ebx
shrq $32, %rax
addl %ebx, %eax
adcl %r9d, %eax

/* do last up to 6 bytes */
.Lhandle_7:
movl %r10d, %ecx
andl $7, %ecx
.L1:    /* .Lshort rejoins the common path here */
shrl $1, %ecx
jz   .Lhandle_1
movl $2, %edx
xorl %ebx, %ebx
clc
.p2align 4
.Lloop_1:
source
movw (%rdi), %bx
adcl %ebx, %eax
decl %ecx
dest
movw %bx, (%rsi)
leaq 2(%rdi), %rdi
leaq 2(%rsi), %rsi
jnz .Lloop_1
adcl %r9d, %eax /* add in carry */

/* handle last odd byte */
.Lhandle_1:
testb $1, %r10b
jz    .Lende
xorl  %ebx, %ebx
source
movb (%rdi), %bl
dest
movb %bl, (%rsi)
addl %ebx, %eax
adcl %r9d, %eax  /* carry */

.Lende:
testq %r10, %r10
js  .Lwas_odd
.Lout:
movq 0*8(%rsp), %rbx
movq 1*8(%rsp), %r12
movq 2*8(%rsp), %r14
movq 3*8(%rsp), %r13
movq 4*8(%rsp), %r15
addq $5*8, %rsp
RET
.Lshort:
movl %ecx, %r10d
jmp  .L1
.Lunaligned:
xorl %ebx, %ebx
testb $1, %sil
jne  .Lodd
1: testb $2, %sil
je   2f
source
movw (%rdi), %bx
dest
movw %bx, (%rsi)
leaq 2(%rdi), %rdi
subq $2, %rcx
leaq 2(%rsi), %rsi
addq %rbx, %rax
2: testb $4, %sil
je .Laligned
source
movl (%rdi), %ebx
dest
movl %ebx, (%rsi)
leaq 4(%rdi), %rdi
subq $4, %rcx
leaq 4(%rsi), %rsi
addq %rbx, %rax
jmp .Laligned

.Lodd:
source
movb (%rdi), %bl
dest
movb %bl, (%rsi)
leaq 1(%rdi), %rdi
leaq 1(%rsi), %rsi
/* decrement, set MSB */
leaq -1(%rcx, %rcx), %rcx
rorq $1, %rcx
shll $8, %ebx
addq %rbx, %rax
jmp 1b

.Lwas_odd:
roll $8, %eax
jmp .Lout

/* Exception: just return 0 */
.Lfault:
xorl %eax, %eax
jmp  .Lout
SYM_FUNC_END(csum_partial_copy_generic)

¤ Dauer der Verarbeitung: 0.3 Sekunden ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.