Quelle memset_64.S Sprache: Sparc

/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright 2002 Andi Kleen, SuSE Labs */

#include <linux/export.h>
#include <linux/linkage.h>
#include <linux/cfi_types.h>
#include <asm/cpufeatures.h>
#include <asm/alternative.h>

.section .noinstr.text, "ax"

/*
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
* simpler and shorter than the original function as well.
*
* rdi   destination
* rsi   value (char)
* rdx   count (bytes)
*
* rax   original destination
*
* The FSRS alternative should be done inline (avoiding the call and
* the disgusting return handling), but that would require some help
* from the compiler for better calling conventions.
*
* The 'rep stosb' itself is small enough to replace the call, but all
* the register moves blow up the code. And two of them are "needed"
* only for the return value that is the same as the source input,
* which the compiler could/should do much better anyway.
*/
SYM_TYPED_FUNC_START(__memset)
ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS

movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
RET
SYM_FUNC_END(__memset)
EXPORT_SYMBOL(__memset)

SYM_FUNC_ALIAS_MEMFUNC(memset, __memset)
SYM_PIC_ALIAS(memset)
EXPORT_SYMBOL(memset)

SYM_FUNC_START_LOCAL(memset_orig)
movq %rdi,%r10

/* expand byte value  */
movzbl %sil,%ecx
movabs $0x0101010101010101,%rax
imulq  %rcx,%rax

/* align dst */
movl  %edi,%r9d
andl  $7,%r9d
jnz  .Lbad_alignment
.Lafter_bad_alignment:

movq  %rdx,%rcx
shrq  $6,%rcx
jz  .Lhandle_tail

.p2align 4
.Lloop_64:
decq  %rcx
movq  %rax,(%rdi)
movq  %rax,8(%rdi)
movq  %rax,16(%rdi)
movq  %rax,24(%rdi)
movq  %rax,32(%rdi)
movq  %rax,40(%rdi)
movq  %rax,48(%rdi)
movq  %rax,56(%rdi)
leaq  64(%rdi),%rdi
jnz    .Lloop_64

/* Handle tail in loops. The loops should be faster than hard
   to predict jump tables. */
.p2align 4
.Lhandle_tail:
movl %edx,%ecx
andl    $63&(~7),%ecx
jz   .Lhandle_7
shrl $3,%ecx
.p2align 4
.Lloop_8:
decl   %ecx
movq  %rax,(%rdi)
leaq  8(%rdi),%rdi
jnz    .Lloop_8

.Lhandle_7:
andl $7,%edx
jz      .Lende
.p2align 4
.Lloop_1:
decl    %edx
movb  %al,(%rdi)
leaq 1(%rdi),%rdi
jnz     .Lloop_1

.Lende:
movq %r10,%rax
RET

.Lbad_alignment:
cmpq $7,%rdx
jbe .Lhandle_7
movq %rax,(%rdi) /* unaligned store */
movq $8,%r8
subq %r9,%r8
addq %r8,%rdi
subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
SYM_FUNC_END(memset_orig)

Messung V0.5

¤ Dauer der Verarbeitung: 0.12 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.