Quelle umulsidi3.S

Sprache: Sparc

/* SPDX-License-Identifier: GPL-2.0-or-later WITH GCC-exception-2.0 */
#include <linux/linkage.h>
#include <asm/asmmacro.h>
#include <asm/core.h>

#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32 || XCHAL_HAVE_MAC16
#define XCHAL_NO_MUL 0
#else
#define XCHAL_NO_MUL 1
#endif

ENTRY(__umulsidi3)

#ifdef __XTENSA_CALL0_ABI__
abi_entry(32)
s32i a12, sp, 16
s32i a13, sp, 20
s32i a14, sp, 24
s32i a15, sp, 28
#elif XCHAL_NO_MUL
/* This is not really a leaf function; allocate enough stack space
   to allow CALL12s to a helper function.  */
abi_entry(32)
#else
abi_entry_default
#endif

#ifdef __XTENSA_EB__
#define wh a2
#define wl a3
#else
#define wh a3
#define wl a2
#endif /* __XTENSA_EB__ */

/* This code is taken from the mulsf3 routine in ieee754-sf.S.
   See more comments there.  */

#if XCHAL_HAVE_MUL32_HIGH
mull a6, a2, a3
muluh wh, a2, a3
mov wl, a6

#else /* ! MUL32_HIGH */

#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL
/* a0 and a8 will be clobbered by calling the multiply function
   but a8 is not used here and need not be saved.  */
s32i a0, sp, 0
#endif

#if XCHAL_HAVE_MUL16 || XCHAL_HAVE_MUL32

#define a2h a4
#define a3h a5

/* Get the high halves of the inputs into registers.  */
srli a2h, a2, 16
srli a3h, a3, 16

#define a2l a2
#define a3l a3

#if XCHAL_HAVE_MUL32 && !XCHAL_HAVE_MUL16
/* Clear the high halves of the inputs.  This does not matter
   for MUL16 because the high bits are ignored.  */
extui a2, a2, 0, 16
extui a3, a3, 0, 16
#endif
#endif /* MUL16 || MUL32 */

#if XCHAL_HAVE_MUL16

#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
mul16u dst, xreg ## xhalf, yreg ## yhalf

#elif XCHAL_HAVE_MUL32

#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
mull dst, xreg ## xhalf, yreg ## yhalf

#elif XCHAL_HAVE_MAC16

/* The preprocessor insists on inserting a space when concatenating after
   a period in the definition of do_mul below.  These macros are a workaround
   using underscores instead of periods when doing the concatenation.  */
#define umul_aa_ll umul.aa.ll
#define umul_aa_lh umul.aa.lh
#define umul_aa_hl umul.aa.hl
#define umul_aa_hh umul.aa.hh

#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
umul_aa_ ## xhalf ## yhalf xreg, yreg; \
rsr dst, ACCLO

#else /* no multiply hardware */

#define set_arg_l(dst, src) \
extui dst, src, 0, 16
#define set_arg_h(dst, src) \
srli dst, src, 16

#ifdef __XTENSA_CALL0_ABI__
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
set_arg_ ## xhalf (a13, xreg); \
set_arg_ ## yhalf (a14, yreg); \
call0 .Lmul_mulsi3; \
mov dst, a12
#else
#define do_mul(dst, xreg, xhalf, yreg, yhalf) \
set_arg_ ## xhalf (a14, xreg); \
set_arg_ ## yhalf (a15, yreg); \
call12 .Lmul_mulsi3; \
mov dst, a14
#endif /* __XTENSA_CALL0_ABI__ */

#endif /* no multiply hardware */

/* Add pp1 and pp2 into a6 with carry-out in a9.  */
do_mul(a6, a2, l, a3, h) /* pp 1 */
do_mul(a11, a2, h, a3, l) /* pp 2 */
movi a9, 0
add a6, a6, a11
bgeu a6, a11, 1f
addi a9, a9, 1
1:
/* Shift the high half of a9/a6 into position in a9.  Note that
   this value can be safely incremented without any carry-outs.  */
ssai 16
src a9, a9, a6

/* Compute the low word into a6.  */
do_mul(a11, a2, l, a3, l) /* pp 0 */
sll a6, a6
add a6, a6, a11
bgeu a6, a11, 1f
addi a9, a9, 1
1:
/* Compute the high word into wh.  */
do_mul(wh, a2, h, a3, h) /* pp 3 */
add wh, wh, a9
mov wl, a6

#endif /* !MUL32_HIGH */

#if defined(__XTENSA_CALL0_ABI__) && XCHAL_NO_MUL
/* Restore the original return address.  */
l32i a0, sp, 0
#endif
#ifdef __XTENSA_CALL0_ABI__
l32i a12, sp, 16
l32i a13, sp, 20
l32i a14, sp, 24
l32i a15, sp, 28
abi_ret(32)
#else
abi_ret_default
#endif

#if XCHAL_NO_MUL

.macro do_addx2 dst, as, at, tmp
#if XCHAL_HAVE_ADDX
addx2 \dst, \as, \at
#else
slli \tmp, \as, 1
add \dst, \tmp, \at
#endif
.endm

.macro do_addx4 dst, as, at, tmp
#if XCHAL_HAVE_ADDX
addx4 \dst, \as, \at
#else
slli \tmp, \as, 2
add \dst, \tmp, \at
#endif
.endm

.macro do_addx8 dst, as, at, tmp
#if XCHAL_HAVE_ADDX
addx8 \dst, \as, \at
#else
slli \tmp, \as, 3
add \dst, \tmp, \at
#endif
.endm

/* For Xtensa processors with no multiply hardware, this simplified
   version of _mulsi3 is used for multiplying 16-bit chunks of
   the floating-point mantissas.  When using CALL0, this function
   uses a custom ABI: the inputs are passed in a13 and a14, the
   result is returned in a12, and a8 and a15 are clobbered.  */
.align 4
.Lmul_mulsi3:
abi_entry_default

.macro mul_mulsi3_body dst, src1, src2, tmp1, tmp2
movi \dst, 0
1: add \tmp1, \src2, \dst
extui \tmp2, \src1, 0, 1
movnez \dst, \tmp1, \tmp2

do_addx2 \tmp1, \src2, \dst, \tmp1
extui \tmp2, \src1, 1, 1
movnez \dst, \tmp1, \tmp2

do_addx4 \tmp1, \src2, \dst, \tmp1
extui \tmp2, \src1, 2, 1
movnez \dst, \tmp1, \tmp2

do_addx8 \tmp1, \src2, \dst, \tmp1
extui \tmp2, \src1, 3, 1
movnez \dst, \tmp1, \tmp2

srli \src1, \src1, 4
slli \src2, \src2, 4
bnez \src1, 1b
.endm

#ifdef __XTENSA_CALL0_ABI__
mul_mulsi3_body a12, a13, a14, a15, a8
#else
/* The result will be written into a2, so save that argument in a4.  */
mov a4, a2
mul_mulsi3_body a2, a4, a3, a5, a6
#endif
abi_ret_default
#endif /* XCHAL_NO_MUL */

ENDPROC(__umulsidi3)
EXPORT_SYMBOL(__umulsidi3)

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.12 Sekunden (vorverarbeitet am 2026-04-25) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.