Quelle mpi_sparc.c

Sprache: C

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

/* Multiplication performance enhancements for sparc v8+vis CPUs. */

#include "mpi-priv.h"
#include <stddef.h>
#include <sys/systeminfo.h>
#include <strings.h>

/* In the functions below, */
/* vector y must be 8-byte aligned, and n must be even */
/* returns carry out of high order word of result */
/* maximum n is 256 */

/* vector x += vector y * scaler a; where y is of length n words. */
extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);

/* vector z = vector x + vector y * scaler a; where y is of length n words. */
extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,
                        int n, mp_digit a);

/* v8 versions of these functions run on any Sparc v8 CPU. */

/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
#define MP_MUL_DxD(a, b, Phi, Plo)                              \
    {                                                           \
        unsigned long long product = (unsigned long long)a * b; \
        Plo = (mp_digit)product;                                \
        Phi = (mp_digit)(product >> MP_DIGIT_BIT);              \
    }

/* c = a * b */
static void
v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
#if !defined(MP_NO_MP_WORD)
    mp_digit d = 0;

    /* Inner product:  Digits of a */
    while (a_len--) {
        mp_word w = ((mp_word)b * *a++) + d;
        *c++ = ACCUM(w);
        d = CARRYOUT(w);
    }
    *c = d;
#else
    mp_digit carry = 0;
    while (a_len--) {
        mp_digit a_i = *a++;
        mp_digit a0b0, a1b1;

        MP_MUL_DxD(a_i, b, a1b1, a0b0);

        a0b0 += carry;
        if (a0b0 < carry)
            ++a1b1;
        *c++ = a0b0;
        carry = a1b1;
    }
    *c = carry;
#endif
}

/* c += a * b */
static void
v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
#if !defined(MP_NO_MP_WORD)
    mp_digit d = 0;

    /* Inner product:  Digits of a */
    while (a_len--) {
        mp_word w = ((mp_word)b * *a++) + *c + d;
        *c++ = ACCUM(w);
        d = CARRYOUT(w);
    }
    *c = d;
#else
    mp_digit carry = 0;
    while (a_len--) {
        mp_digit a_i = *a++;
        mp_digit a0b0, a1b1;

        MP_MUL_DxD(a_i, b, a1b1, a0b0);

        a0b0 += carry;
        if (a0b0 < carry)
            ++a1b1;
        a0b0 += a_i = *c;
        if (a0b0 < a_i)
            ++a1b1;
        *c++ = a0b0;
        carry = a1b1;
    }
    *c = carry;
#endif
}

/* Presently, this is only used by the Montgomery arithmetic code. */
/* c += a * b */
static void
v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
#if !defined(MP_NO_MP_WORD)
    mp_digit d = 0;

    /* Inner product:  Digits of a */
    while (a_len--) {
        mp_word w = ((mp_word)b * *a++) + *c + d;
        *c++ = ACCUM(w);
        d = CARRYOUT(w);
    }

    while (d) {
        mp_word w = (mp_word)*c + d;
        *c++ = ACCUM(w);
        d = CARRYOUT(w);
    }
#else
    mp_digit carry = 0;
    while (a_len--) {
        mp_digit a_i = *a++;
        mp_digit a0b0, a1b1;

        MP_MUL_DxD(a_i, b, a1b1, a0b0);

        a0b0 += carry;
        if (a0b0 < carry)
            ++a1b1;

        a0b0 += a_i = *c;
        if (a0b0 < a_i)
            ++a1b1;

        *c++ = a0b0;
        carry = a1b1;
    }
    while (carry) {
        mp_digit c_i = *c;
        carry += c_i;
        *c++ = carry;
        carry = carry < c_i;
    }
#endif
}

/* These functions run only on v8plus+vis or v9+vis CPUs. */

/* c = a * b */
void
s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
    mp_digit d;
    mp_digit x[258];
    if (a_len <= 256) {
        if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
            mp_digit *px;
            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
            memcpy(px, a, a_len * sizeof(*a));
            a = px;
            if (a_len & 1) {
                px[a_len] = 0;
            }
        }
        s_mp_setz(c, a_len + 1);
        d = mul_add_inp(c, a, a_len, b);
        c[a_len] = d;
    } else {
        v8_mpv_mul_d(a, a_len, b, c);
    }
}

/* c += a * b, where a is a_len words long. */
void
s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
    mp_digit d;
    mp_digit x[258];
    if (a_len <= 256) {
        if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
            mp_digit *px;
            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
            memcpy(px, a, a_len * sizeof(*a));
            a = px;
            if (a_len & 1) {
                px[a_len] = 0;
            }
        }
        d = mul_add_inp(c, a, a_len, b);
        c[a_len] = d;
    } else {
        v8_mpv_mul_d_add(a, a_len, b, c);
    }
}

/* c += a * b, where a is y words long. */
void
s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
{
    mp_digit d;
    mp_digit x[258];
    if (a_len <= 256) {
        if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
            mp_digit *px;
            px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
            memcpy(px, a, a_len * sizeof(*a));
            a = px;
            if (a_len & 1) {
                px[a_len] = 0;
            }
        }
        d = mul_add_inp(c, a, a_len, b);
        if (d) {
            c += a_len;
            do {
                mp_digit sum = d + *c;
                *c++ = sum;
                d = sum < d;
            } while (d);
        }
    } else {
        v8_mpv_mul_d_add_prop(a, a_len, b, c);
    }
}

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.24 Sekunden (vorverarbeitet am 2026-04-28) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.