/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under both the BSD-style license (found in the * LICENSE file in the root directory of this source tree) and the GPLv2 (found * in the COPYING file in the root directory of this source tree). * You may select, at your option, one of the above-listed licenses.
*/
#if defined(__aarch64__) /* Mark that this assembly supports BTI & PAC, because it is empty for aarch64. * See: https://github.com/facebook/zstd/issues/3841 * See: https://gcc.godbolt.org/z/sqr5T4ffK * See: https://lore.kernel.org/linux-arm-kernel/20200429211641.9279-8-broonie@kernel.org/ * See: https://reviews.llvm.org/D62609
*/
.pushsection .note.gnu.property, "a"
.p2align 3
.long 4 /* size of the name - "GNU\0" */
.long 0x10 /* size of descriptor */
.long 0x5 /* NT_GNU_PROPERTY_TYPE_0 */
.asciz "GNU"
.long 0xc0000000 /* pr_type - GNU_PROPERTY_AARCH64_FEATURE_1_AND */
.long 4 /* pr_datasz - 4 bytes */
.long 3 /* pr_data - GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC */
.p2align 3 /* pr_padding - bring everything to 8 byte alignment */
.popsection
#endif
#endif
#if ZSTD_ENABLE_ASM_X86_64_BMI2
/* Calling convention: * * %rdi contains the first argument: HUF_DecompressAsmArgs*. * %rbp isn't maintained (no frame pointer). * %rsp contains the stack pointer that grows down. * No red-zone is assumed, only addresses >= %rsp are used. * All register contents are preserved. * * TODO: Support Windows calling convention.
*/
/* Sets up register mappings for clarity. * op[], bits[], dtable & ip[0] each get their own register. * ip[1,2,3] & olimit alias var[]. * %rax is a scratch register.
*/
.L_4X1_compute_olimit: /* Computes how many iterations we can do safely * %r15, %rax may be clobbered * rbx, rdx must be saved * op3 & ip0 mustn't be clobbered
*/
movq %rbx, 0(%rsp)
movq %rdx, 8(%rsp)
/* Store clobbered variables on the stack */
movq %olimit, 24(%rsp)
movq %ip1, 0(%rsp)
movq %ip2, 8(%rsp)
movq %ip3, 16(%rsp)
/* Call GET_NEXT_DELT for each stream */
FOR_EACH_STREAM(GET_NEXT_DELT)
.p2align 6
.L_4X1_loop_body: /* Decode 5 symbols in each of the 4 streams (20 total) * Must have called GET_NEXT_DELT for each stream
*/
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 0)
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 1)
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 2)
FOR_EACH_STREAM_WITH_INDEX(DECODE_AND_GET_NEXT, 3)
FOR_EACH_STREAM_WITH_INDEX(DECODE_FROM_DELT, 4)
/* Load ip[1,2,3] from stack (var[] aliases them) * ip[] is needed for RELOAD_BITS * Each will be stored back to the stack after RELOAD
*/
movq 0(%rsp), %ip1
movq 8(%rsp), %ip2
movq 16(%rsp), %ip3
/* Reload each stream & fetch the next table entry * to prepare for the next iteration
*/
RELOAD_BITS(0)
GET_NEXT_DELT(0)
/* Restore registers */
pop %r15
pop %r14
pop %r13
pop %r12
pop %r11
pop %r10
pop %r9
pop %r8
pop %rdi
pop %rsi
pop %rbp
pop %rdx
pop %rcx
pop %rbx
pop %rax
ret
_HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
ZSTD_CET_ENDBRANCH /* Save all registers - even if they are callee saved for simplicity. */
push %rax
push %rbx
push %rcx
push %rdx
push %rbp
push %rsi
push %rdi
push %r8
push %r9
push %r10
push %r11
push %r12
push %r13
push %r14
push %r15
.L_4X2_compute_olimit: /* Computes how many iterations we can do safely * %r15, %rax may be clobbered * rdx must be saved * op[1,2,3,4] & ip0 mustn't be clobbered
*/
movq %rdx, 0(%rsp)
/* We can consume up to 7 input bytes each iteration. */
movq %ip0, %rax /* rax = ip0 */
movq 40(%rsp), %rdx /* rdx = ilowest */
subq %rdx, %rax /* rax = ip0 - ilowest */
movq %rax, %r15 /* r15 = ip0 - ilowest */
/* Restore registers */
pop %r15
pop %r14
pop %r13
pop %r12
pop %r11
pop %r10
pop %r9
pop %r8
pop %rdi
pop %rsi
pop %rbp
pop %rdx
pop %rcx
pop %rbx
pop %rax
ret
#endif
Messung V0.5
¤ Dauer der Verarbeitung: 0.15 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.