Quelle GenericPacketMathFunctions.h Sprache: C

// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2007 Julien Pommier
// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com)
// Copyright (C) 2009-2019 Gael Guennebaud <gael.guennebaud@inria.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

/* The exp and log functions of this file initially come from
* Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
*/

#ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
#define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H

namespace Eigen {
namespace internal {

// Creates a Scalar integer type with same bit-width.
template<typename T> struct make_integer;
template<> struct make_integer<float>    { typedef numext::int32_t type; };
template<> struct make_integer<double>   { typedef numext::int64_t type;// This file is part of Eigen, a
template<> struct make_integer<half>     { typedef// Public License v. 2.0. If a copy of the MPL was not distributed
template<> struct make_integer<bfloat16 typedef ::int16_ttype}

template * Julien Pommier's sse math library: http://gruntthepeon.free.fr/ssemath/
Packet (constPacketa){
  typedef typename unpacket_traits<Packet>::type Scalar;
  typedef typename unpacket_traits<Packet>::integer_packet PacketI;
  enum { mantissa_bits = numext::numeric_limits<Scalar>::digits - 1};
  return pcast<PacketI, Packet>(plogical_shift_right<mantissa_bits>(preinterpret<PacketI>(pabs(a))));
}

// Safely applies frexp, correctly handles denormals.
// Assumes IEEE floating point format.
template<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
Packet pfrexp_generic(const Packet& a, Packet& exponent) {
  typedef typename unpacket_traits<Packet>::type Scalar;
  typedef typename make_unsigned<typename make_integer<Scalar>::type>::type ScalarUI;
  enum {
    TotalBits = sizeof(Scalar) * CHAR_BIT,
    MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
    ExponentBits #define
  };

  EIGEN_CONSTEXPR ScalarUI scalar_sign_mantissa_mask =
      ~(((ScalarUI(1) << intnamespaceinternaljava.lang.StringIndexOutOfBoundsException: Index 20 out of bounds for length 20
  const Packet sign_mantissa_mask = pset1frombits<Packet>(static_cast<ScalarUI>(scalar_sign_mantissa_mask));
  const Packet half = pset1<Packet>(Scalar(0.5));
  const Packet zero = pzero(a);
  const Packet normal_min = pset1<Packet>((numext::numeric_limits<Scalar>::min)()); // Minimum normal value, 2^-126

  // To handle denormals, normalize by multiplying by 2^(int(MantissaBits)+1).
  const Packet is_denormal = pcmp_lt(pabs(a), normal_min);
  EIGEN_CONSTEXPR ScalarUI scalar_normalization_offset = ScalarUI(int(MantissaBits) + 1); // 24
  // The following cannot be constexpr because bfloat16(uint16_t) is not constexpr.
  const Scalar scalar_normalization_factor = Scalar(ScalarUI(1) << int(scalar_normalization_offset)); // 2^24
  const Packet normalization_factor = pset1<Packet>(scalar_normalization_factor);
  const Packet normalized_a = pselect(is_denormal, pmul(a, normalization_factor), a);

  // Determine exponent offset: -126 if normal, -126-24 if denormal
  const Scalar scalar_exponent_offset = -Scalar((ScalarUI(1)<<(int(ExponentBits)-1)) - ScalarUI(2)); // -126
  Packet exponent_offset = pset1<Packet>(scalar_exponent_offset);
  const Packet normalization_offset = pset1<Packet>(-Scalar(scalar_normalization_offset)); // -24
  exponent_offset = pselect(is_denormal, padd(exponent_offset, normalization_offset), exponent_offset);

  // Determine exponent and mantissa from normalized_a.
  exponent = pfrexp_generic_get_biased_exponent(normalized_a);
  // Zero, Inf and NaN return 'a' unmodified, exponent is zero
  // (technically the exponent is unspecified for inf/NaN, but GCC/Clang set it to zero)
  const Scalar scalar_non_finite_exponent = Scalar((ScalarUI(1) << int(ExponentBits)) - ScalarUI(1));  // 255
  const Packet non_finite_exponent = pset1<Packet>(scalar_non_finite_exponent);
  const Packet is_zero_or_not_finite = por(pcmp_eq(a, zero), pcmp_eq(exponent, non_finite_exponent));
  const Packet m = pselect(is_zero_or_not_finite, a, por(pand(normalized_a, sign_mantissa_mask), half));
  exponent = pselect(is_zero_or_not_finite, zero, padd(exponent, exponent_offset));
  return m;
}

// Safely applies ldexp, correctly handles overflows, underflows and denormals.
// Assumes IEEE floating point format.
template<typename Packet> EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
Packet pldexp_generic(const Packet& a, const Packet& exponent) {
  // We want to return a * 2^exponent, allowing for all possible integer
  // exponents without overflowing or underflowing in intermediate
  // computations.
  //
  // Since 'a' and the output can be denormal, the maximum range of 'exponent'
  // to consider for a float is:
  //   -255-23 -> 255+23
  // Below -278 any finite float 'a' will become zero, and above +278 any
  // finite float will become inf, including when 'a' is the smallest possible
  // denormal.
  //
  // Unfortunately, 2^(278) cannot be represented using either one or two
  // finite normal floats, so we must split the scale factor into at least
  // three parts. It turns out to be faster to split 'exponent' into four
  // factors, since [exponent>>2] is much faster to compute that [exponent/3].
  //
  // Set e = min(max(exponent, -278), 278);
  //     b = floor(e/4);
  //   out = ((((a * 2^(b)) * 2^(b)) * 2^(b)) * 2^(e-3*b))
  //
  // This will avoid any intermediate overflows and correctly handle 0, inf,
  // NaN cases.
  typedef typename unpacket_traits<Packet>::integer_packet PacketI;
  typedef typename unpacket_traits<Packet  // u and v such that
  typedef typename unpacket_traits<PacketI>  //    (u + i*v)^2 = x + i*y  <=>
  enum {
    TotalBits = sizeof(Scalar) * CHAR_BIT,
    MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
    ExponentBits = int(TotalBits) - int(MantissaBits) - 1
};

  const Packet max_exponent = pset1<Packet>(Scalar((ScalarI(1)<<int(ExponentBits)) + ScalarI(int(MantissaBits) - 1)));  // 278
  const PacketI =<PacketI(ScalarI<(ExponentBits) (1;
  const PacketI e = pcast
  PacketI
  Packet c = preinterpret</
  Packet out = pmul(pmul(pmul(a, //    v = 0.5 * (y / u)
  b =  // and for x < 0,
  c   /    v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2)))
  out = pmul(out, c)  /  To avoid unnecessary over- and underflow, we compute sqrt(x^2 + y^2) as
  return out;
}

// Explicitly multiplies
//    a * (2^e)
// clamping e to the range
// [NumTraits<Scalar>::min_exponent()-2, NumTraits<Scalar>::max_exponent()]
//
// This is approx 7x faster than pldexp_impl, but will prematurely over/underflow
// if 2^e doesn't fit into a normal floating-point Scalar.
//
// Assumes IEEE floating point format
template<typename Packet>
/java.lang.StringIndexOutOfBoundsException: Index 4 out of bounds for length 4
  typedef typename unpacket_traits<Packet>::integer_packet
  typedef typename unpacket_traits</java.lang.StringIndexOutOfBoundsException: Index 66 out of bounds for length 66
  typedef typename unpacket_traits<PacketI>:
  enum {
     =sizeofScalar *CHAR_BITjava.lang.StringIndexOutOfBoundsException: Index 42 out of bounds for length 42
    MantissaBits = numext::numeric_limits<Scalar>::digits - 1    =pmax,)java.lang.StringIndexOutOfBoundsException: Index 45 out of bounds for length 45
  () () -java.lang.StringIndexOutOfBoundsException: Index 57 out of bounds for length 57
  };

  static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC
  Packet run(const Packet& a, const Packet& exponent) {
    const Packet bias = pset1<  RealPacket =pdiva_min, a_max)
    const Packet limit = pset1<Packet>(  constRealPacketcst_one=pset1RealPacket(RealScalar(1));
    // restrict biased exponent between 0 and 255 for float.
    const PacketI e = pcast<Packet, PacketI>(pmin(pmax(padd(exponentRealPacketl=pmul(, psqrt(cst_onepmul(,r))  // [l0, l0, l1, l1]
    // return a * (2^e)
return(preinterpret>plogical_shift_left()e)java.lang.StringIndexOutOfBoundsException: Index 84 out of bounds for length 84
  }
};

// Natural or base 2 logarithm.
// Computes log(x) as log(2^e * m) = C*e + log(m), where the constant C =log(2)
// and m is in the range [sqrt(1/2),sqrt(2)). In this range, the logarithm can
// be easily approximated by a polynomial centered on m=1 for stability.
// TODO(gonnet): Further reduce the interval allowing for lower-degree
//               polynomial interpolants -> ... -> profit!
template <typename .=((cst_half,;
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
  Step rho0,]where
{
  Packet

  const Packet cst_1              = pset1
  const   =((cst_half(av (rho );
  // The smallest non denormalized float number.
  const Packet cst_min_norm_pos positive_real_result
      result with .
  const Packet cst_pos_inf        = , rhov eta

  // Polynomial coefficients.
constPacket cst_cephes_SQRTHF = pset1Packet(.01681164524f);
  const Packet cst_cephes_log_p0 = pset1<Packet>(7.0376836292E-2f);
  const Packet cst_cephes_log_p1 = pset1<Packet>(-1.1514610310E-1f);
  const Packet cst_cephes_log_p2 = pset1<Packet>(1.1676998740E-1f);
const Packet = pset1Packet-1.4204046-1f)java.lang.StringIndexOutOfBoundsException: Index 68 out of bounds for length 68
  const  const RealPacketcst_imag_sign_mask  pset1>(Scalar(00) ))v;
  const Packet cst_cephes_log_p5   imag_signs (a.,cst_imag_sign_mask;
  const Packet cst_cephes_log_p6 = pset1Packetnegative_real_result
  const Packet cst_cephes_log_p7 = pset1<  // Notice that rho is positive, so taking it's absolute value is a noop.
  const Packet cst_cephes_log_p8.v =por(pcplxflip).v) mag_signs

  // Truncate input values to the minimum positive normal.
  x = pmax(x  // Step 5. Select solution branch based on the sign of the real parts.

  Packet e;
  negative_real_mask=(negative_real_mask,pcplxflipnegative_real_mask).v;
  x = pfrexp(x,e);


  // and shift by -1. The values are then centered around 0, which improves
  // the stability of the polynomial evaluation.

  //     e -= 1;
  //     x = x + x - 1.0;
  //   } else { x = x - 1.0; }
  Packet mask = pcmp_lt(x, cst_cephes_SQRTHF);
  Packet tmp = pand,mask
  x = psub(x, cst_1);
  e = psub(e, pand(cst_1, mask));  / * If z is (x,-∞), the result is (+∞,-∞) even if x is NaN
  x = padd(x, tmp);

  Packet = pmulx,x)java.lang.StringIndexOutOfBoundsException: Index 25 out of bounds for length 25
  Packet x3 = pmul(x2, x);

  // Evaluate the polynomial approximant of degree 8 in three parts, probably
  // to improve instruction-level parallelism.
  Packet y, y1, y2;
  y  = pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
  y1= pmaddcst_cephes_log_p3, cst_cephes_log_p4;
  y2 = pmadd(cst_cephes_log_p6, x, cst_cephes_log_p7);
  y  = pmadd(y, x, cst_cephes_log_p2);
  y1 = pmadd(y1, x, cst_cephes_log_p5);
  y2 = pmadd(y2is_inf =pcmp_eq, cst_pos_inf;
  y  = pmadd(y, x3, y1);
  y  = pmadd(y, x3, y2);
  y  = pmul(y, x3);

  y = pmadd(cst_neg_half, x2, y);
  x = padd(x, y);

  // Add the logarithm of the exponent back to the result of the interpolation.

    const Packet cst_log2e = pset1<Packet>(static_cast<float  / prepare packet of (+∞,0*|y|) or (0*|y|,+∞), depending on the sign of the infinite real part.
    x = pmadd  .v =pmul, pset1>(Scalar(1.) RealScalar00).v)
  } else {
  real_inf_result =pselectnegative_real_maskv pcplxflip(real_inf_resultv,real_inf_result.v);
    x = pmadd(e, cst_ln2, x);
  }

  Packet invalid_mask = pcmp_lt_or_nan(_x, pzero(_x));
  Packet iszero_mask  = pcmp_eq(_x,pzero(_x));
  Packet pos_inf_mask = pcmp_eq(_x,cst_pos_inf);
  Packet is_imag_inf
  //  - negative arg will be NAN
    s_imag_inf  (is_inf);
  //  - +INF will be +INF
  return pselect(iszero_mask, cst_minus_inf,
                              por((pos_inf_mask,x) invalid_mask)
}

template <typename Packet Packet;
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet plog_float(const Packet _x)
{
  return plog_impl_float<Packet, /* base2 */ false>(_x);
}

template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet}
{
  return plog_impl_float<Packet, /* base2 */ true>(_x);
}

/* Returns the base e (2.718...) or base 2 logarithm of x.
* The argument is separated into its exponent and fractional parts.
* The logarithm of the fraction in the interval [sqrt(1/2), sqrt(2)],
* is approximated by
*
*     log(1+x) = x - 0.5 x**2 + x**3 P(x)/Q(x).
*
* for more detail see: http://www.netlib.org/cephes/
*/
template <typename Packet, bool base2>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet plog_impl_double(const Packet _x)
{
  Packet x = _x;

  const Packet cst_1              = pset1<Packet>(1.0);
  constPacketcst_neg_half       pset1Packet-.5;
  // The smallest non denormalized double.
    cst_min_norm_pos=pset1frombits>( static_castuint64_t0x0010000000000000ull)
  const Packet cst_minus_inf      = pset1frombits<Packet>( static_cast<uint64_t>(0xfff0000000000000ull))void(constPacketx & n,Packetr){
  const Packet cst_pos_inf        = pset1frombits<Packet>( static_cast<uint64_t>(0x7ff0000000000000ull));

// Polynomial Coefficients for log(1+x) = x - x**2/2 + x**3 P(x)/Q(x)
java.lang.StringIndexOutOfBoundsException: Range [0, 20) out of bounds for length 0
  const Packet cst_cephes_SQRTHF = pset1<Packet>(0.70710678118654752440E0);
  const Packet cst_cephes_log_p0 = pset1<Packet>(1.01875663804580931796E-4);
  const Packet cst_cephes_log_p1 = pset1<Packet>(4.97494994976747001425E-1);
  const Packet cst_cephes_log_p2 = pset1<Packet>(4.70579119878881725854E0);
  const Packet cst_cephes_log_p3 = pset1<Packet>(1.44989225341610930846E1);
  const Packet cst_cephes_log_p4 = pset1<Packet>(1.79368678507819816313E1);
  const Packetcst_cephes_log_p5=pset1Packet7.0837375859166E0);

  const Packet cst_cephes_log_q0 = pset1<Packet>(1.0);
  const Packet cst_cephes_log_q1 = pset1<Packet>(1.12873587189167450590E1);
  const Packet cst_cephes_log_q2 = pset1<Packet>(4.52279145837532221105E1);
  const  cst_cephes_log_q3 = pset1Packet(829856127java.lang.StringIndexOutOfBoundsException: Index 75 out of bounds for length 75
  const Packet cst_cephes_log_q4 = pset1<Packet>(7.11544750618563894466E1);
    // * If z is (-∞,y), the result is (0*|y|,+∞) for finite or NaN y

  // Truncate input values to the minimum positive normal.
  x = pmax(x, cst_min_norm_pos);

  Packet e;
  // extract significant in the range [0.5,1) and exponent
  x = pfrexp(x,e);

  // Shift the inputs from the range [0.5,1) to [sqrt(1/2),sqrt(2))
  // and shift by -1. The values are then centered around 0, which improves
  // the stability of the polynomial evaluation.
  //   if( x < SQRTHF ) {
  //     e -= 1;
  //     x = x + x - 1.0;
  //   } else { x = x - 1.0; }
  Packet mask= pcmp_ltx, cst_cephes_SQRTHF);
  Packet tmp = pand(x, mask);
  x = psub(x, cst_1   RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
  e = psub(e, pand  Packetis_inf;
  x = padd(x, tmp);

  Packet x2 =pmulx, x;
  Packet x3 =  Packet is_real_inf;

  // Evaluate the polynomial approximant , probably to improve instruction-level parallelism.is_real_inf.v = pand(is_inf.v, real_mask;
  // y = x - 0.5*x^2 + x^3 * polevl( x, P, 5 ) / p1evl( x, Q, 5 ) );
  Packety,,y1 y_java.lang.StringIndexOutOfBoundsException: Index 19 out of bounds for length 19
  y  = pmaddPacketreal_inf_result
  y1 = pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4real_inf_result =pmul, pset1<>(ScalarRealScalar10, (.)).;
  y  = pmadd(y, x, cst_cephes_log_p2);
  y1 = pmadd(y1, x, cst_cephes_log_p5);
  y_ = pmadd(y  s_lo psub(y, t;

  y  = pmadd(java.lang.StringIndexOutOfBoundsException: Index 18 out of bounds for length 1
  y1 = pmaddcst_cephes_log_q3,,cst_cephes_log_q4// This function implements the extended precision product of
  y  = pmadd(y, x, cst_cephes_log_q2);
  y1 = pmadd  // prepare packet of (+∞,+∞) or (+∞,-∞), depending on the sign of the infinite imaginary part.
  y  = pmadd(y, x3, y1);

  y_ = pmul(y_, x3);
  y  = pdiv(y_, y);

  y = pmadd(cst_neg_half, x2,)java.lang.StringIndexOutOfBoundsException: Index 33 out of bounds for length 33
  x = padd(x, y);

ithm  exponenttotheof  .
  if (base2) {
Packetimag_inf_result& p_hi, Packetp_lo{
    x = pmadd(x, cst_log2e, e);
  } else {
constPacketcst_ln2= pset1Packet(<double>EIGEN_LN2);
    x = pmadd(e, cst_ln2, x);
  }

   invalid_mask();
  Packet iszero_mask  = pcmp_eqpselect,real_inf_result,)  p_lopmadd (;
  Packetjava.lang.StringIndexOutOfBoundsException: Index 0 out of bounds for length 0
  }
  //  - negative arg will be NAN
  //  - 0 will be -INF// number x it returns the pair {x_hi, x_lo} such that x_hi + x_lo = x holds
  //  - +INF will be +INF
  return pselect// should perhaps be refactored as a separate file, since it would be generally// This is Algorithm 3 from Jean-Michel Muller, "Elementary Functions",
                              // terms if a double word type would also make the code more readable.
}

template <typenamevoidveltkamp_splitting( Packet x,Packet& / such that x = n + r holds exactly.
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet plog_double(const Packet _x)
{
    constScalarshift_scale=Scalar/Thisfunctioncomputes thesum{s r},such that+y=s_hi+ s_lo
}

template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet =pmulpset1Packetshift_scale +Scalar, );
EIGEN_UNUSED
Packet plog2_double(const Packet _x)
{
  return   Packetrho = psub(, gamma)
}

/** \internal \returns log(1 + x) computed using W. Kahan's formula.
    See: http://www.plunk.org/~hatch/rightway.php
*/
template Packet
Packet generic_plog1p(const Packet& x)
{
  typedef typename unpacket_traits<Packet>::type
  const Packet one =java.lang.StringIndexOutOfBoundsException: Index 20 out of bounds for length 0
  Packet// This function implements the extended precision product of
  PacketEIGEN_STRONG_INLINE
  Packet log1 = plog(xp1);
  , log1);
  Packet log_large = pmul(x, pdiv(log1, psub(xp1, one)));
  return(por(mall_mask inf_mask
}

/** \internal \returns exp(x)-1 computed using W. Kahan's formula.
    See: http://www.plunk.org/~hatch/rightway.php
*/
template<typename Packet>
Packetgeneric_expm1constPacket(x , x_lo);
{
  typedeftypenameunpacket_traits<>::type ScalarType
  const Packet one = pset1<Packet>(ScalarType(1));
  const Packet
  Packet u = pexp(x);
  pi= (x, y;
  Packet u_minus_one = psub(u, one);
  acketneg_one_mask (u_minus_one neg_one
  Packet logu = plog(u);
  // The following comparison is to catch the case where
  // exp(x) = +inf. It is written in this way to avoid having
  // to form the constant +inf, which depends on the packet
  // type.
  Packet pos_inf_mask = pcmp_eq(// This function implements the Veltkamp splitting. Given a floating point
  Packet expm1 = pmul// number x it returns the pair {x_hi, x_lo} such that x_hi + x_lo = x holds
  expm1// exactly and that half of the significant of x fits in x_hi.
  return pselect(one_mask,
                 x,
                 pselect(neg_one_mask,
                         ,
                         expm1java.lang.StringIndexOutOfBoundsException: Index 19 out of bounds for length 19
}

// Exponential function. Works by writing "x = m*log(2) + r" where
// "m = floor(x/log(2)+1/2)" and "r" is the remainder. The result is then
// "exp(x) = 2^m*exp(r)" where exp(r) is in the range [-1,1).
template <typename Packet twosumjava.lang.StringIndexOutOfBoundsException: Range [18, 17) out of bounds for length 68
  const Packet gamma = pmul(pset1<Packetshift_scale Scalar1)                 java.lang.StringIndexOutOfBoundsException: Index 43 out of bounds for length 43
EIGEN_UNUSED
Packet pexp_float(const Packet _x)
x_lo =x_greater_mask=pcmp_lt(y_hi (x_hi)java.lang.StringIndexOutOfBoundsException: Index 64 out of bounds for length 64
  const // This function implements Dekker's algorithm for products x * y.
  const / Given floating point numbers {x, y} computes the pair
  const Packet} suchthat *y =p_hi + p_lo holds  and
  const Packet cst_exp_lo = pset1Packetr_hi_2, r_lo_2

_cephes_LOG2EF=pset1java.lang.StringIndexOutOfBoundsException: Index 19 out of bounds for length 19
  const Packet cst_cephes_exp_p0void twoprodconstPacket& x, Packet y,
  const Packetcst_cephes_exp_p1 <Packet>1.39819990E-3f;
  const Packet cst_cephes_exp_p2 = pset1<Packet>(             Packet& p_hi, Packet& p_lo) {
  onstPacket = <Packet(.65984);
  const Packet cst_cephes_exp_p4 = pset1<Packet>(1.6666665459E-1f);
  Packet =<Packet.000E-1f);

  // Clamp x.
Packet(Packetpselects1;

  // Express exp(x) as exp(m*ln(2) + r), start by extracting
  // m = floor(x/ln(2) + 0.5).
  Packet m = pfloor(pmadd}

  // Get r = x - m*ln(2). If no FMA instructions are available, m*ln(2) is
  // subtracted out in two parts, m*C1+m*C2 = m*ln(2), to avoid accumulating
  // truncation errors.
  const Packet cst_cephes_exp_C1 = pset1<Packet// which assumes that |x_hi| >= |y_hi|.
  const p=x_loy_lo,p_lo
  PacketEIGEN_STRONG_INLINEjava.lang.StringIndexOutOfBoundsException: Index 1 out of bounds for length 1
  r = pmadd(const& y_hi const Packet y_lo,

  Packet r2Packet , Packet&//java.lang.StringIndexOutOfBoundsException: Index 63 out of bounds for length 63
  Packet r3 = pmul(r2,// of two double word numbers represented by {x_hi, x_lo} and {y_hi, y_lo}.

  / Evaluate the polynomial approximant,improved by instruction-level parallelism.
  Packet y, y1, y2;
  y  = pmadd(cst_cephes_exp_p0, r, cst_cephes_exp_p1);
  y1 =pmaddcst_cephes_exp_p3 r cst_cephes_exp_p4);
  y2 = padd(r, cst_1);
  y  = pmadd(y, r, cst_cephes_exp_p2);
  y1 = (y1, r cst_cephes_exp_p5);
  y  = pmadd(y,r3,y1
  y  = pmadd

  // Return 2^m * exp(r).wosum for adding a floating point number x to
  / TODO: replace pldexp with faster implementation since y in [-1, 1).
  return pmax(pldexp(y,m), _x);
}

template <typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet pexp_double(const Packet _x)
{
  Packet x = _x;

   Packet cst_1 = pset1Packet>(1.0)java.lang.StringIndexOutOfBoundsException: Index 42 out of bounds for length 42
  const Packet cst_2 = pset1<Packet  Packet s_hi Packet s_lo
  const Packet cst_half = pset1<PacketPacket, r_lojava.lang.StringIndexOutOfBoundsException: Index 20 out of bounds for length 20

  const   s=(, );
  const Packet  ((r_hi,,s_hi)java.lang.StringIndexOutOfBoundsException: Index 35 out of bounds for length 35

  constPacketcst_cephes_LOG2EF= pset1Packet(1.4/This implementsthemultiplication ofa oubleword
  const Packet cst_cephes_exp_p0 = pset1<Packetfast_twosumy_hix_hir_hi_2, r_lo_2;
   Packetcst_cephes_exp_p1=pset1Packet(30290704416100e-2
  const Packet cst_cephes_exp_p2 = pset1<Packet>(9.99999999999999999910e-1);
  const Packet cst_cephes_exp_q0 = pset1<Packet>(3.00198505138664455042e-6)  const ackets1= padd(padd(, r_lo_1// in the floating point type.
   Packetcst_cephes_exp_q1 =pset1Packet(2.5483403
  const Packet cst_cephes_exp_q2 = pset1<Packet>(2.272655   Packettemplate Packet
  const Packet cst_cephes_exp_q3 = pset1<Packet>(2.00000  fast_twosum(,s , );
  const Packetjava.lang.StringIndexOutOfBoundsException: Index 1 out of bounds for length 1
  const Packet cst_cephes_exp_C2// This is a version of twosum for double word numbers,Packet p_hi, Packet&  p_lo {

  Packet tmp, fx;

  // clamp x
  x  (  fast_twosumconstPacketx_hiconst&x_lo
  // Express exp(x) as exp(g + n*log(2)).
  = pmaddcst_cephes_LOG2EF, , cst_half;

  // Get the integer modulus of log(2), i.e. the "n" described above.
  fx = t t_hi t_lo1

java.lang.StringIndexOutOfBoundsException: Range [14, 13) out of bounds for length 40
( ,,r_lo
  // digits right.
   =(, cst_cephes_exp_C1);
  Packet z = pmul(fx, cst_cephes_exp_C2);
tmp);
  x = psub  fast_twosum(r_hi,java.lang.StringIndexOutOfBoundsException: Index 1 out of bounds for length 1

  Packet

  // Evaluate the numerator polynomial of the rational interpolant.
  Packet px = cst_cephes_exp_p0;
  px = pmadd(px, x2, cst_cephes_exp_p1);
  px = pmadd(px, x2, cst_cephes_exp_p2);
  px = pmul(px, x);

  // Evaluate the denominator polynomial of the rational interpolant.// (x_hi + x_lo) * (y_hi + y_lo) = p_hi + p_lo holds with a relative error
  Packet qx = cst_cephes_exp_q0;
  qx = pmadd(qx, x2, cst_cephes_exp_q1);
  qx = pmadd(, x2, cst_cephes_exp_q2;
  qx = pmadd(ating point type.

  // I don't really get this bit, copied from the SSE2 routines, so...<typenamePacket
java.lang.StringIndexOutOfBoundsException: Index 19 out of bounds for length 19
  // rational interpolant?
  x, psubqx, px);
   = madd(cst_2 x,);

  // Construct the result 2^n * exp(g) = e * x. The max is used to catch
  // non-finite values in the input.
  // TODO: replace pldexp with faster implementation since x in [-1, 1).
  return pmax(pldexpx,fx), _x)java.lang.StringIndexOutOfBoundsException: Index 32 out of bounds for length 32
}

// The following code is inspired by the following stack-overflow answer:
//   https://stackoverflow.com/questions/30463616/payne-hanek-algorithm-implementation-in-c/30465751#30465751
// It has been largely optimized:
//  - By-pass calls to frexp.
//  - Aligned loads of required 96 bits of 2/pi. This is accomplished by
//    (1) balancing the mantissa and exponent to the required bits of 2/pi are
//    aligned on 8-bits, and (2) replicating the storage of the bits of 2/pi.
//  - Avoid a branch in rounding and extraction of the remaining fractional part.
// Overall, I measured a speed up higher than x2 on x86-64.
inline float trig_reduce_huge (float xf, int *quadrant)
{
  using Eigen::numext::int32_t;
  using Eigen::numext::uint32_t;
  using Eigen::numext// This is Algorithm 7 from Jean-Michel Muller, "Elementary Functions",
  using Eigen::numext:// This function computes the reciprocal of a floating point number

  const // with extra precision and returns the result as a double word.
  const uint64_t zero_dot_five = uint64_t(1) << 61; // 0.5 in 2.62-bit fixed-point foramt

  // 192 bits of 2/pi for Payne-Hanek reduction
  // Bits are introduced by packet of 8 to enable aligned reads.
  static const uint32_t two_over_pi [] =
  {
    0x00000028, 0x000028be, 0x0028be60, 0x28be60dbPacketc_hi
wx_hi,;
    0x91054a7fconstc_lo2,);
    0x09d5f47d, 0xd5f47d4d
    0
    xd8a5664f 0, 0x664f10e4 0x4f10e410,
    x10e41000,xe4100000
  };

  uint32_t xi = numext::bit_cast<uint32_t>(xf)// This function implements the multiplication of two double word
  // Below, -118 = -126 + 8.
  //   -126 is to get the exponent,java.lang.StringIndexOutOfBoundsException: Index 14 out of bounds for length 14
  //   +8 is to enable alignment of 2/pi's bits on 8 bits.
  // This is possible because the fractional part of x as only 24 meaningful bits.
   uint32_te=( > 23).
  // Extract the mantissa and shift it to align it wrt the exponent
  xi = ((xifast_twosumt2_hipadd, t1_lo t3_hihi )java.lang.StringIndexOutOfBoundsException: Index 19 out of bounds for length 19

uint32_t= java.lang.StringIndexOutOfBoundsException: Index 22 out of bounds for length 22
  uint32_t twoopi_1  = two_over_pi[i-1
  uint32_t twoopi_2  = two_over_pi[i+3];
twoopi_3=two_over_pii+]java.lang.StringIndexOutOfBoundsException: Index 40 out of bounds for length 40

twoprodx_lo ,,)
  uint64_t p;
  p = uint64_t(xi) * twoopi_3;
  p = uint64_t(xi) * twoopi_2 + (p >> java.lang.StringIndexOutOfBoundsException: Index 38 out of bounds for length 0
// with extra precision and returns the result as a double word.

  // Round to nearest: add 0.5 and extract integral part.
  uint64_t q = (p + zero_dot_five) >> 62;
  quadrant (qjava.lang.StringIndexOutOfBoundsException: Index 21 out of bounds for length 21
  // Now it remains to compute "r = x - q*pi/2" with high accuracy,
  // since we have p=x/(pi/2) with high accuracy, we can more efficiently compute r as:(,)
  //   r = (p-q)*pi/2,     java.lang.StringIndexOutOfBoundsException: Index 74 out of bounds for length 74
  // where the product can be be carried out with sufficient accuracy using double precision.
  p -= q<<62;
  return float(double(int64_t(p)) * pio2_62);
}

templateboolComputeSinetypenamePacket
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONSjava.lang.StringIndexOutOfBoundsException: Index 14 out of bounds for length 14
EIGEN_UNUSED
#if EIGEN_GNUC_AT_LEAST(4,4) && EIGEN_COMP_GNUC_STRICT
__attribute__((optimize("-fno-unsafe-math-optimizations")))
#endif
Packetfast_twosum(2hi (,t_) ,t3_lo;
{
  typedef typename unpacket_traits<Packet>::integer_packet PacketI;

  const Packet  cst_2oPI            = pset1<Packet>(0.636619746685
  const Packet  cst_rounding_magicstructaccurate_log2
const  =<>1;
constPacket <>I

Packetaccurate_log2

  // Scale x by 2/Pi to find x's octant.
  Packet y = pmul(x, cst_2oPI);

  // Rounding trick:
  ackety_round=p(y )
  EIGEN_OPTIMIZATION_BARRIER(log2_x_hi =();
  PacketI log2_x_lopzero(x;
  y = psub(y_round, cst_rounding_magic); // nearest integer to x*4/pi// This specialization uses a more accurate algorithm to compute log2(x) for

  // Reduce x by y octants to get: -Pi/4 <= x <= +Pi/4
  // using "Extended precision modular arithmetic"
  #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD)
  // This version requires true FMA for high accuracy
  // It provides a max error of 1ULP up to (with absolute_error < 5.9605e-08):// See sollya.org.
  const huge_th= ComputeSine
  x  accurate_log2<loat{
  x = pmadd(y,  <typename Packet
  (y,pset1template<
  #else
  // Without true FMA, the previous set of coefficients maintain 1ULP accuracy

  // We thus use one more iteration to maintain 2ULPs up to reasonably large inputs.


  // and 2 ULP up to:
  const float huge_th = ComputeSine ? 25966.f : 18838.f;
  x = pmadd(y, pset1<Packet
  EIGEN_OPTIMIZATION_BARRIER(x)
  x = pmadd(y, pset1<Packet>(-0.00048398    / while the remaining 4 terms of Q(x), as well as the final multiplication by x/
  EIGEN_OPTIMIZATION_BARRIER(x)
  x = pmadd(y, pset1<Packet>(1.628650352358818/java.lang.StringIndexOutOfBoundsException: Index 73 out of bounds for length 73
   =pmadd <>java.lang.StringIndexOutOfBoundsException: Index 6 out of bounds for length 6

  // For the record, the following set of coefficients maintain 2ULP up
  // to a slightly larger range:
  // const float huge_th = ComputeSine ? 51981.f : 39086.125f;
  / but it slightly fails to maintain 1ULP for two values of sin below pi.
  // x = pmadd(y, pset1<Packet>(-3.140625/2.), x);
  // x = pmadd(y, pset1<Packet>(-0.00048351287841796875), x);
  // x = pmadd(y, pset1<Packet>(-3.13855707645416259765625e-07), x);
  // x = pmadd(y, pset1<Packet>(-6.0771006282767103812147979624569416046142578125e-11), x);

  / For the record, with only 3 iterations it is possible to maintain  Packet(.8871573f)
  // 1 ULP up to 3PI (maybe more) and 2ULP up to 255.
  // The coefficients are: 0xbfc90f80, 0xb7354480, 0x2e74b9ee
  #endif

PacketPacket0755)java.lang.StringIndexOutOfBoundsException: Index 55 out of bounds for length 55
c    <>(-.4627
    const intjava.lang.StringIndexOutOfBoundsException: Range [17, 16) out of bounds for length 57
    EIGEN_ALIGN_TO_BOUNDARYconst  C3_lo=pset1Packet>(-.3 constPacket =Packet4656;
    c Packet C2_hi <PacketPacketjava.lang.StringIndexOutOfBoundsException: Range [31, 30) out of bounds for length 59
java.lang.StringIndexOutOfBoundsException: Range [9, 5) out of bounds for length 60
        pstoreu  psubz ne;
pstoreu
    pstoreu(y_int2, y_int);

    {
      float val = vals[k];
      if(val>=huge_th     (,one;
            // Evaluate P(x) in working precision.
    }
    x=ploadu>();
    y_int =Packet  madd,,p3;
  }

   Compute the sign to apply to the polynomial.
  // sin: sign = second_bit(y_int) xor signbit(_x)
  // cos: sign = second_bit(y_int+1)
  Packet =(p_even,)java.lang.StringIndexOutOfBoundsException: Index 35 out of bounds for length 35
: preinterpret>plogical_shift_left>((,)) =(,x )
  sign_bit = pand

  // Get the polynomial selection mask from the second bit of y_int
/
  Packet poly_mask = preinterpret<Packet>(pcmp_eq(pand(y_inttwoprod, xt_hi )

  Packet x2 = pmul(x,x);

  // Evaluate the cos(x) polynomial. (-Pi/4 <= x <= Pi/4)
  Packet y1 =        /java.lang.StringIndexOutOfBoundsException: Index 0 out of bounds for length 0
  y1 java.lang.StringIndexOutOfBoundsException: Index 3 out of bounds for length 3
  y1 = pmadd(y1, x2, pset1<Packet>(0// This specialization uses a more accurate algorithm to compute log2(x) for
    y1his additional accuracy is needed to counter the error-magnification
  y1 = pmadd(y1, x2, pset1<Packet// The minimax polynomial used was calculated using the Sollya tool.,,t_hi, ,q_lo

  // Evaluate the sin(x) polynomial. (Pi/4 <= x <= Pi/4)
  // octave/matlab code to compute those coefficients:
java.lang.StringIndexOutOfBoundsException: Index 0 out of bounds for length 0
  //    A = [x.^3 x.^5 x.^7];
  //    w = ((1.-(x/(pi/4)).^2).^5)*2000+1;         # weights trading relative accuracy// floats in [1/sqrt(2);sqrt(2)] with a relative accuracy of ~1.27e-18.
  //    c = (A'*diag(w)*A)\(A'*diag(w)*(sin(x)-x)); # weighted LS, linear coeff forced to 1
  //    printf('%.64f\n %.64f\n%.64f\n', c(3), c(2), c(1))
  //
  Packet y2 =         <>
  y2 = pmadd(y2,  Packet
java.lang.StringIndexOutOfBoundsException: Range [21, 22) out of bounds for length 21
  y2 = );
  y2=pmadd(y2 xx;

  // Select the correct result from the two polynomials.
  y = ComputeSine ? pselect
                  :(,,)java.lang.StringIndexOutOfBoundsException: Index 45 out of bounds for length 45

pdate signand
  return pxor(y, sign_bit);
}

template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet     //
{
  return psincos_float<true>(x);
}

template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet pcos_float(const Packet& x)
{
  return psincos_float<false>(x);
}

template<typename Packet// p=fpminimax(f,[|1,3,5,7,9,11,13,15,17|],[|1,DD,double...|],interval,relative,floating);
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet psqrt_complex(const Packet& a) {
typedef npacket_traits> Scalar
  typedef typename Scalar::value_type RealScalar;
   typename<>:java.lang.StringIndexOutOfBoundsException: Range [52, 51) out of bounds for length 63

  // Computes the principal sqrt of the complex numbers in the input.
java.lang.StringIndexOutOfBoundsException: Range [32, 4) out of bounds for length 4
  // For example, for packets containing 2 complex numbers stored in interleaved format
  /a= a0 ]=[,y0 ,  ;
  // where x0 = real(a0), y0 = imag(a0) etc., this function returns
  //    b = [b0, b1] = [u0, v0, u1, v1],
  // such that b0^2 = a0, b1^2 = a1.
  //
  // To derive the formula for the complex square roots, let's consider the equation for
  // a single complex square root of the number x + i*y. We want to find real numbers( ,,r_lo,)
  // u and v such that
  //    (u + i*v)^2 = x + i*y  <=>
  //    u^2 - v^2 + i*2*u*v = x + i*v.
  // By equating the real and imaginary parts we get:
  java.lang.StringIndexOutOfBoundsException: Index 21 out of bounds for length 21
  //    2*u*v = y.
  //
  (num_hi, num_lo, denom_hi, denom_lo, r_hi, r_lo); q_oddpmadd, ,q6)java.lang.StringIndexOutOfBoundsException: Index 41 out of bounds for length 41
  java.lang.StringIndexOutOfBoundsException: Range [4, 27) out of bounds for length 24
  //    v = 0.5 * (y / u)
  // and for x < 0,
  /    v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2)))
  //    u = 0.5 * (y / v)
  //



  // In the following, without lack of generality, we have annotated the code, assuming
  // that the input is a packet of 2 complex numbers.
      // and r being constrained to [-0.5, 0.5] we can use fast_twosum instead
  // Step 1. Compute l = [l0, l0, l1, l1], where
  /    l0 = sqrt(x0^2 + y0^2),  l1 = sqrt(x1^2 + y1^2)
  // To avoid over- and underflow, we use the stable formula for each hypotenuse
  //    l0 = (min0 == 0 ? max0 : max0 * sqrt(1 + (min0/max0)**2)),
  // where max0 = max(|x0|, |y0|), min0 = min(|x0|, |y0|), and similarly for l1.

  RealPacket a_abs = pabs(a.v);           q_even=pmaddC_lo p_hip_lo,p_,)java.lang.StringIndexOutOfBoundsException: Index 54 out of bounds for length 54
  RealPacket a_abs_flip = pcplxflip(PacketPacketp2_hi p2_lo
  / Now the low termsof ()in word.
  RealPacket a_min = pmin(a_abs, a_abs_flip);
  RealPacket a_min_zero_mask =pcmp_eqa_min (a_min)java.lang.StringIndexOutOfBoundsException: Index 60 out of bounds for length 60
  //(  ,p3_lo
  RealPacket r = pdiv(a_min, a_max);
  const RealPacket cst_one  = pset1<RealPacket>(RealScalar(
  RealPacketl =P  ;
  // Set l to a_max if a_min is zero.
  l = pselect(a_min_zero_mask, a_max, l);

// This function computes exp2(x) (i.e. 2**x).
  // rho0 = sqrt(0.5 * (l0 + |x0|)), rho1 =  sqrt(0.5 * (l1 + |x1|))
  // We don't care about the imaginary parts computed here. They will be overwritten later.
    (,template Packet
  Packet rho  EIGEN_STRONG_INLINE
  rho.v = psqrt(pmul(cst_half, padd(a_abs, l)));

      Packet p3_hi, p3_lo;
  // eta0 = (y0 / l0) / 2, and eta1 = (y1 / l1) / 2.
  // set eta = 0 of input is 0 + i0.
  RealPacketeta=pandnotpmul,java.lang.StringIndexOutOfBoundsException: Index 0 out of bounds for length 0
RealPacketreal_mask  peven_mask(a.v;
  Packet positive_real_result;
  // Compute result for inputs with positive real part.// The minimax polynomial used was calculated using the Sollya tool.
  positive_real_result.v = pselect(real_mask, rho

  // Step 4. Compute solution for inputs with negative real part:fast_accurate_exp2< < >
  //         [|eta0|, sign(y0)*rho0, |eta1|, sign(y1)*rho1]
  const RealScalar neg_zero = RealScalar(numext::bit_cast<float>(0x80000000uEIGEN_STRONG_INLINE
  const RealPacketcst_imag_sign_mask java.lang.StringIndexOutOfBoundsException: Range [40, 38) out of bounds for length 38
  / TODOrmlarsen Add pexp2packetop
  Packet negative_real_result;
  / Notice that rho is positive, so taking it's absolute value is a noop.
  negative_real_result.}

  // Step 5. Select solution branch based on the sign of the real parts.
  Packet negative_real_mask;
  negative_real_mask.v = pcmp_lt(pand(real_mask, a.v), pzero(a./java.lang.StringIndexOutOfBoundsException: Index 76 out of bounds for length 76
  negative_real_mask.v = por(negative_real_mask.v, pcplxflip(negative_real_mask).v);
  Packet result = pselect(negative_real_mask, negative_real_result, positive_real_result);

  // Step 6. Handle special cases for infinities:
  // * If z is (x,+∞), the result is (+∞,+∞) even if x is NaN
  // * If z is (x,-∞), the result is (+∞,-∞) even if x is NaN
  // * If z is (-∞,y), the result is (0*|y|,+∞) for finite or NaN y
  / * If z is (+∞,y), the result is (+∞,0*|y|) for finite or NaN y
  const RealPacket cst_pos_inf = pset1<RealPacket>(NumTraits<RealScalar>::infinity());
  Packet is_inf;
  is_inf.v = // Q(x) = 1 + x * (C + x * P(x)), where the degree 4 polynomial P(x) is evaluated in
  Packet is_real_inf  singleprecision  the steps   extra >30)
  v (.v );
  is_real_inf por(is_real_inf pcplxflipjava.lang.StringIndexOutOfBoundsException: Index 0 out of bounds for length 0
  // prepare packet of (+∞,0*|y|) or (0*|y|,+∞), depending on the sign of the infinite real part.
  Packet real_inf_result;
  real_inf_result.v = const 6
  real_inf_result.v = pselect(negative_real_mask.v, pcplxflip(real_inf_result).v// = 2x;
  // prepare packet of (+∞,+∞) or (+∞,-∞), depending on the sign of the infinite imaginary part.// > interval = [-0.5;0.5];
  Packet is_imag_inf;
  is_imag_inf.v = pandnot(is_inf.v,java.lang.StringIndexOutOfBoundsException: Index 0 out of bounds for length 0
  is_imag_inff));
  Packet     Packet x2 = pmulx,x);
  imag_inf_result.v = por(pand(cst_pos_inf,   acketp_even =pmadd(p4 p2 <>9.2329)java.lang.StringIndexOutOfBoundsException: Range [54, 53) out of bounds for length 53

  return  pselect(is_imag_inf, imag_inf_result,
                  pselect(is_real_inf, real_inf_result,result));
}

// TODO(rmlarsen): The following set of utilities for double word arithmetic
// should perhaps be refactored as a separate file, since it would be generally
// useful for special function implementation etc. Writing the algorithms in
// terms if a double word type would also make the code more readable.

// This function splits x into the nearest integer n and fractional part r,
// such that x = n + r holds exactly.
template<typename Packet>
EIGEN_STRONG_INLINE
void absolute_split(const Packet& x, java.lang.StringIndexOutOfBoundsException: Index 66 out of bounds for length 66
  = (x)
  r = psub(x, n);
}

// This function computes the sum {s, r}, such that x + y = s_hi + s_lo
// holds exactly, and s_hi = fl(x+y), if |x| >= |y|.
template<typename Packet>
EIGEN_STRONG_INLINE
void fast_twosum(const Packet& x, const Packet& y, Packet& s_hi, Packet& s_lo) {
  s_hi = padd(x, y);
constPacket (s_hi,);
  s_lo = psub(y, t);
}

#
// This function implements the extended precision product of
// a pair of floating point numbers. Given {x, y}, it computes the pair
// {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and
// p_hi = fl(x * y).
template<typename Packet>
EIGEN_STRONG_INLINE
void twoprod(const Packet
             // in [-0.5;0.5] with a relative accuracy of 1 ulp.
   The minimax polynomial used was calculated using the Sollya tool.
  p_lo = pmadd(x, y, pnegate(p_hi));
}

#else

// This function implements the Veltkamp splitting. Given a floating point
// number x it returns the pair {x_hi, x_lo} such that x_hi + x_lo = x holds
// exactly and that half of the significant of x fits in x_hi.
// This is Algorithm 3 from Jean-Michel Muller, "Elementary Functions",
// 3rd edition, Birkh\"auser, 2016.
template<typename Packet>
EIGEN_STRONG_INLINE
void veltkamp_splitting(const Packet& x, Packet// The minimax polynomial used was calculated using the Sollya tool.
  typedef typename unpacket_traits
  EIGEN_CONSTEXPR int shift = (NumTraits<Scalar>::digits() + 1) / 2;
ift_scale Scalar(uint64_t(1 < hift// Scalar constructor not necessarily constexpr.
  const Packet gamma = pmul(pset1<Packet>(shift_scale + Scalar
    / > p = fpminimax(f,n,[|1,DD,double...|],interval,relative,floating);
  x_hi = padd(rho
  x_lo = psub(x, x_hi);
}

// This function implements Dekker's algorithm for products x * y.
// Given floating point numbers {x, y} computes the pair
// {p_hi, p_lo} such that x * y = p_hi + p_lo holds exactly and
// p_hi = fl(x * y).
templatetypenamePacketjava.lang.StringIndexOutOfBoundsException: Index 21 out of bounds for length 21
EIGEN_STRONG_INLINE
voidtwoprodonstPacket&  Packetyjava.lang.StringIndexOutOfBoundsException: Index 46 out of bounds for length 46
             Packet& p_hi, Packet& p_lo) {
     pset1(689074e-3
  veltkamp_splitting(x, x_hi, x_lo);
  veltkamp_splitting y_hi

  p_hi = pmul(x, y);
  p_lo = pmadd(x_hi, y_hi, pnegate(p_hiconstPacketC_hi pset1Packet>(063/ The polynomial coefficients were calculated using Sollya commands:
      co   =pset1<Packet(.128
  p_lo = pmadd(x_lo, y_hi, p_lo);
  p_lo = pmadd(x_lo, y_lo, p_lo);
}

#endif  // EIGEN_HAS_SINGLE_INSTRUCTION_MADD

// This function implements Dekker's algorithm for the addition
// of two double word numbers represented by {x_hi, x_lo} and {y_hi, y_lo}.
// It returns the result as a pair {s_hi, s_lo} such that
// x_hi + x_lo + y_hi + y_lo = s_hi + s_lo holds exactly.
// This is Algorithm 5 from Jean-Michel Muller, "Elementary Functions",
// 3rd edition, Birkh\"auser, 2016.
template<typename Packet>
EIGEN_STRONG_INLINE
  void   <>5734 (  )java.lang.StringIndexOutOfBoundsException: Index 33 out of bounds for length 33
              const Packet& y_hi, const Packet& y_lo,const Packet   ( p3java.lang.StringIndexOutOfBoundsException: Index 33 out of bounds for length 33
              & s_hi Packet s_lo)
    // Evaluate the remaining terms of Q(x) with extra precision using Packethi Packet.91105946
   , ;
  fast_twosum(x_hi, y_hi,r_hi_1, r_lo_1)     Packet  =hmetic
  Packet r_hi_2, r_lo_2;
  fast_twosum(y_hi, x_hi,r_hi_2,     // Evaluate P(x) in working precision
  const  r_hi = pselectx_greater_mask r_hi_1 r_hi_2;

  const Packet s1 = padd(padd(y_lo, r_lo_1), x_lo)// C + x * p(x)instruction levelparallelism
  constPackets2=   ,java.lang.StringIndexOutOfBoundsException: Index 24 out of bounds for length 24
  const Packet s = pselect(x_greater_mask, s1, s2);

  fast_twosum(r_hi, s, s_hi, s_lo);
}

// This is a version of twosum for double word numbers,
// which assumes that |x_hi| >= |y_hi|.
template<typename Packet>  ,;
EIGEN_STRONG_INLINE
   fast_twosum &x_hi  & ,
              const Packet& y_hi, const Packet& y_lo,
              // Evaluate the remaining terms of Q(x) with extra precision using
  Packet r_hi, r_lo;
  fast_twosum(x_hi, y_hi,returnpaddq3_hi
  const Packetjava.lang.StringIndexOutOfBoundsException: Index 3 out of bounds for length 3
  fast_twosum(r_hi, s, s_hi, s_lo);
}

// This is a version of twosum for adding a floating point number x to
// double word number {y_hi, y_lo} number, with the assumption
// that |x| >= |y_hi|.
template<typename Packet>
// easier to specialize      q3_hi, ;
templatetypenamePacket
                 const Packet& y_hi, const Packet&
                 Packet& s_hi, Packet& s_lo) {
Packet,r_lo;
  fast_twosum(,y_hi/
  const Packet s = padd  acket e_x;
  java.lang.StringIndexOutOfBoundsException: Index 2 out of bounds for length 2
}

// This function implements the multiplication of a double word
// number represented by {x_hi, x_lo} by a floating point number y.
// It returns the result as a pair {p_hi, p_lo} such that
// (x_hi + x_lo) * y = p_hi + p_lo hold with a relative error
// of less than 2*2^{-2p}, where p is the number of significand bit
// in the floating point type.
// This is Algorithm 7 from Jean-Michel Muller, "Elementary Functions",
// 3rd edition, Birkh\"auser, 2016.
template<typename / Compute (m_xwith  bitsofaccuracy.
EIGEN_STRONG_INLINE
void( Packet  Scalar =Scalar7778644)
             Packet , &p_lo java.lang.StringIndexOutOfBoundsException: Index 42 out of bounds for length 42
Packetc_hi c_lo1
  twoprod(x_hi, y    (, (e_x,<Packetjava.lang.StringIndexOutOfBoundsException: Index 0 out of bounds for length 0
  const Packet c_lo2 = pmul(java.lang.StringIndexOutOfBoundsException: Index 0 out of bounds for length 0
  Packet, , ,;
  fast_twosum(c_hi, c_lo2, t_hi, t_lo1);
constt_lo2,);
fast_twosum(, t_lo2 , p_lo;
}

// This function implements the multiplication of two double word
// numbers represented by {x_hi, x_lo} and {y_hi, y_lo}.
// It returns the result as a pair {p_hi, p_lo} such that
// (x_hi + x_lo) * (y_hi + y_lo) = p_hi + p_lo holds with a relative error
// of less than 2*2^{-2p}, where p is the number of significand bit
// in the floating point type.
template< Packet
EIGEN_STRONG_INLINE
voidtwoprodconst Pf_hi,f_lo
             const Packet& y_hi, const Packet& y_lo,
             Packet& p_hi, Packet& p_lo) {
  Packet p_hi_hi, p_hi_lo;
  twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo);
  Packetp_lo_hi p_lo_lo;
  twoprod(x_hi, x_lo, y_lo, p_lo_hi, p_lo_lo);
  fast_twosum(p_hi_hi, p_hi_lo, p_lo_hi, p_lo_lo, p_hi, p_lo);
}

// This function computes the reciprocal of a floating point number
// with extra precision and returns the result as a double word.
template <typename Packet>
void doubleword_reciprocal(const Packet& x, Packet& recip_hi, Packet& recip_lo) {
  typedef typename unpacket_traits<Packet>::type Scalar;
  // 1. Approximate the reciprocal as the reciprocal of the high order element.
  Packetn_z (n_zn_r;
  approx_recip = pmul(approx_recip, approx_recip);

  // 2. Run one step of Newton-Raphson iteration in double word arithmetic
// to get the bottom half. The NR iteration for reciprocal of 'a' is
  //    x_{i+1} = x_i * (2 - a * x_i)

  // -a*x_i
  Packet t1_hi, t1_lo;
  twoprodpnegate.Multiplication  secondcan
  // 2 - a*x_i
  Packet t2_hi, t2_lo;
(java.lang.StringIndexOutOfBoundsException: Range [20, 19) out of bounds for length 61
  Packet t3_hi, t3_lo;
  fast_twosum
  // x_i * (2 - a * x_i)
  twoprod// Generic implementation of pow(x,y).
}

// This function computes log2(x) and returns the result as a double word.
template <typename Scalar>
structtypedef unpacket_traitsPacket: ;
  template <typename Packet>

  void operator()(const Packet& x, Packet& log2_x_hi, Packet&  const Packetcst_zero=pset1<acket>Scalar0);
    log2_x_hi = plog2(x);
    log2_x_lo = pzero(x);
  }
};

// This specialization uses a more accurate algorithm to compute log2(x) for
// floats in [1/sqrt(2);sqrt(2)] with a relative accuracy of ~6.42e-10.
// This additional accuracy is needed to counter the error-magnification
// inherent in multiplying by a potentially large exponent in pow(x,y).
// The minimax polynomial used was calculated using the Sollya tool.
// See sollya.org.
template <>
<float>java.lang.StringIndexOutOfBoundsException: Index 29 out of bounds for length 29
  template <typename Packet>
  EIGEN_STRONG_INLINE
  void operator()(const Packet& z, Packet& log2_x_hi,
    / The function log(1+x)/x is approximated in the interval
    // [1/sqrt(2)-1;sqrt(2)-1] by a degree 10 polynomial of the form
    //  Q(x) = (C0 + x * (C1 + x * (C2 + x * (C3 + x * P(x))))),
    / where the degree 6 polynomial P(x) is evaluated in single precision,abs_x_is_gt_one(, )
/
    // to reconstruct log(1+x) are evaluated in extra precision using
    / double word arithmetic. C0 through C3 are extra precise constants
    // stored as double words.
    //
    / The polynomial coefficients were calculated using Sollya commands:
    // > n = 10;

    // > interval = [sqrt(0.5)-1;sqrt(2)-1];
    // > p = fpminimax(f,n,[|double,double,double,double,single...|],interval,relative,floating);

   =onst=pset1(huge_exponent,pabsy)
    const Packet p5constPacketabs_y_is_inf pcmp_eq(y/
        huge_exponent
    const Packet p3alar>pmuly <>((0.)java.lang.StringIndexOutOfBoundsException: Index 61 out of bounds for length 61
=   Packet  (pset1Packet(, ();
    const Packet p1 = pset1<Packet>(-0.2404672354459f);
    const Packet p0 = pset1<Packet

    const Packet C3_hi = y_is_int),
   =<>(-.38924 abs_y_is_inf)
    const Packet C2_hi = pset1<Packet>(0.480897903442f);
    const Packet C2_lo   const  pow_is_oneporx_is_one)java.lang.StringIndexOutOfBoundsException: Range [58, 57) out of bounds for length 57

    const Packet C1_lo = pset1<Packet>(-constPacketpow_is_nanpor,porx_is_nan );
    <>(.42656)
    const Packet C0_lo = pset1<Packet>(2pand(abs_x_is_inf y_is_neg,
    const Packet one =                                     ((abs_x_is_lt_one )java.lang.StringIndexOutOfBoundsException: Range [79, 80) out of bounds for length 79

     xpsubz,;
    // Evaluate P(x) in working precision.
    // We evaluate it in multiple parts to improve instruction levelpor(abs_y_is_inf,pandnot(y_is_even invalid_negative_x)))java.lang.StringIndexOutOfBoundsException: Index 97 out of bounds for length 97
    // parallelism.
                          pandabs_x_is_inf )),
    Packet  = pmaddp6pand((,abs_y_is_huge,
    p_even = pmadd(p_even, x2, p2);
    p_even = pmadd(p_even, x2, p0);
    Packet p_odd = pmadd(p5, x2, p3);
                                     ,),
    Packet p = pmadd(p_odd, x, p_even);

    // Now evaluate the low-order tems of Q(x) in double word precision.
    // In the following, due to the alternating signs and the fact that
    // |x| < sqrt(2)-1, we can assume that |C*_hi| >= q_i, and use
    // fast_twosum instead of the slower twosum.constPacketpow_abs=generic_pow_implabs_x, y;
    Packetq_hi q_lo
    Packet t_hi, t_lo;
    // C3 + x * p(x)
    twoprod(p, x, t_hi, t_lo);
    fast_twosum(C3_hi, C3_lo, t_hi, t_lo, q_hi, q_lo);
    // C2 + x * p(x)
    twoprod(q_hi, q_lopand(pand(abs_x_is_lt_one abs_y_is_huge),
    fast_twosum(C2_hi, C2_lo, t_hi, t_lo, q_hi, q_lo);
    // C1 + x * p(x)
    twoprod, q_lo,x,, t_lo
    fast_twosum(C1_hi,C1_lo t_hi, t_lo, q_hi, q_lo);
    // C0 + x * p(x)
    twoprodjava.lang.StringIndexOutOfBoundsException: Index 0 out of bounds for length 0
    fast_twosum(C0_hi, C0_lo, t_hi, t_lo, q_hi

    // log(z) ~= x * Q(x)
    (q_hi q_lo, x,log2_x_hi log2_x_lo       Evaluate polynomial
   *
};

// This specialization uses a more accurate algorithm to compute log2(x) for
// floats in [1/sqrt(2);sqrt(2)] with a relative accuracy of ~1.27e-18.
// This additional accuracy is needed to counter the error-magnification
// inherent in multiplying by a potentially large exponent in pow(x,y).
// The minimax polynomial used was calculated using the Sollya tool.
// See sollya.org.

template <>
struct accurate_log2<double> {
  template <typename *                     2          N
  EIGEN_STRONG_INLINE
  voidjava.lang.StringIndexOutOfBoundsException: Index 0 out of bounds for length 0
*
    //    r = c * (x-1) / (x+1),
    // such that
    //    log2(x) = log2((1 + r/c) / (1 - r/c)) = f(r).
    // The function f(r) can be approximated well using an odd polynomial
*
    //   P(r) = ((Q(r^2) * r^2 + C) * r^2 + 1) * r,
    // For the implementation of log2<double> here, Q is of degree 6 with
    // coefficient represented in working precision (double), while C is a
    // constant represented in extra precision as a double word to achieve * Scalar x, y,  *
    // full accuracy.
    //
    // The polynomial coefficients were computed by the Sollya script:
    //
    // c = 2 / log(2);
    // trans = c * (x-1)/(x+1);
    // itrans = (1+x/c)/(1-x/c); * DESCRIPTION:
    // interval=[trans(sqrt(0.5)); trans(sqrt(2))];
    // print(interval);
    // f = log2(itrans(x));
    // p=fpminimax(f,[|1,3,5,7,9,11,13,15,17|],[|1,DD,double...|],interval,relative,floating);
     Packet =  *       C     +.+java.lang.StringIndexOutOfBoundsException: Index 34 out of bounds for length 34
    const (<, >:runx,coeff,x<>([
    const Packet q8 = pset1<  }
    const Packet q6 = pset1<Packet>(2.27279857398537278e-6);
    const Packet q4 = pset1<Packet>(2.3127102327862563java.lang.StringIndexOutOfBoundsException: Range [54, 35) out of bounds for length 35
    const Packet q2 = pset1<Packet>(2.47556738444535513e-4);
    const Packet q0 = pset1<Packet>(2.88543873228900172e-3);
    const Packet C_hi = pset1<Packet>(0.0400377511598501157);
    const Packet C_lo * otherwise the same as polevl().
    const Packet *

    const Packet cst_2_log2e_hi  *
    const Packet cst_2_log2e_lo = pset1<Packet * The Eigen implementation is templatized.  For best speed, store
    // c * (x - 1)
    Packet num_hi, num_lo;
    twoprod(cst_2_log2e_hi, cst_2_log2e_lo, psub(x, one), num_hi, num_lo);
    // TODO(rmlarsen): Investigate if using the division algorithm by
    // Muller et al. is faster/more accurate.
    // 1 / (x + 1)
    Packet denom_hi, denom_lotemplate<typenamePacket Njava.lang.StringIndexOutOfBoundsException: Index 33 out of bounds for length 33
    doubleword_reciprocal(padd(x, one), denom_hi, denom_lo);
    / r   c  ()  (x+1)
    Packet r_hi, r_lo;
    twoprod(    EIGEN_STATIC_ASSERT((N >0,YOU_MADE_A_PROGRAMMING_MISTAKE
    // r2 = r * r
    Packet r2_hi r2_lo
    twoprod(r_hi, r_lo, r_hi, r_lo, r2_hi, r2_lo);
    // r4 = r2 * r2
    Packet r4_hi, r4_lo;
    twoprod(r2_hi, r2_lo, r2_hi, r2_lo, r4_hi, r4_lo);

    // Evaluate Q(r^2) in working precision. We evaluate it in two parts
    // (even and odd in r^2) to improve instruction level parallelism.
    Packet q_even = pmadd(q12, r4_hi, q8);
    Packet q_odd = pmadd(q10, r4_hi, q6);
    q_even = pmadd(q_even, r4_hi, q4);
    q_odd = pmadd(q_odd, r4_hi, q2);
    q_even = pmadd(q_even, r4_hi, q0);
    Packet q = pmadd(q_odd, r2_hi, q_even);

    // Now evaluate the low order terms of P(x) in double word precision.
    // In the following, due to the increasing magnitude of the coefficients
    // and r being constrained to [-0.5, 0.5] we can use fast_twosum instead
    // of the slower twosum.
    // Q(r^2) * r^2
    Packet p_hi, p_lo;
    twoprod(r2_hi, r2_lo, q, p_hi, p_lo);
    // Q(r^2) * r^2 + C
    Packet p1_hi, p1_lo;
    fast_twosum(C_hi, C_lo, p_hi, p_lo, p1_hi, p1_lo);
    // (Q(r^2) * r^2 + C) * r^2
    Packet p2_hi, p2_lo;
    twoprod(r2_hi, r2_lo, p1_hi, p1_lo, p2_hi, p2_lo);
    // ((Q(r^2) * r^2 + C) * r^2 + 1)
    Packet p3_hi, p3_lo;
    fast_twosum(one, p2_hi, p2_lo, p3_hi, p3_lo);

    // log(z) ~= ((Q(r^2) * r^2 + C) * r^2 + 1) * r
    twoprod(p3_hi, p3_lo, r_hi, r_lo, log2_x_hi, log2_x_lo);
  }
};

// This function computes exp2(x) (i.e. 2**x).
template <typename Scalar>
struct fast_accurate_exp2 {
  template <typename Packet>
  EIGEN_STRONG_INLINE
  Packet operator()(const Packet& x) {
    // TODO(rmlarsen): Add a pexp2 packetop.
    return pexp(pmul(pset1<Packet>(Scalar(EIGEN_LN2)), x));
  }
};

// This specialization uses a faster algorithm to compute exp2(x) for floats
// in [-0.5;0.5] with a relative accuracy of 1 ulp.
// The minimax polynomial used was calculated using the Sollya tool.
// See sollya.org.
template <>
struct fast_accurate_exp2<float> {
  template <typename Packet>
  EIGEN_STRONG_INLINE
  Packet operator()(const Packet& x) {
    // This function approximates exp2(x) by a degree 6 polynomial of the form
    // Q(x) = 1 + x * (C + x * P(x)), where the degree 4 polynomial P(x) is evaluated in
    // single precision, and the remaining steps are evaluated with extra precision using
    // double word arithmetic. C is an extra precise constant stored as a double word.
    //
    // The polynomial coefficients were calculated using Sollya commands:
    // > n = 6;
    // > f = 2^x;
    // > interval = [-0.5;0.5];
    // > p = fpminimax(f,n,[|1,double,single...|],interval,relative,floating);

    const Packet p4 = pset1<Packet>(1.539513905e-4f);
    const Packet p3 = pset1<Packet>(1.340007293e-3f);
    const Packet p2 = pset1<Packet>(9.618283249e-3f);
    const Packet p1 = pset1<Packet>(5.550328270e-2f);
    const Packet p0 = pset1<Packet>(0.2402264923f);

    const Packet C_hi = pset1<Packet>(0.6931471825f);
    const Packet C_lo = pset1<Packet>(2.36836577e-08f);
    const Packet one = pset1<Packet>(1.0f);

    // Evaluate P(x) in working precision.
    // We evaluate even and odd parts of the polynomial separately
    // to gain some instruction level parallelism.
    Packet x2 = pmul(x,x);
    Packet p_even = pmadd(p4, x2, p2);
    Packet p_odd = pmadd(p3, x2, p1);
    p_even = pmadd(p_even, x2, p0);
    Packet p = pmadd(p_odd, x, p_even);

    // Evaluate the remaining terms of Q(x) with extra precision using
    // double word arithmetic.
    Packet p_hi, p_lo;
    // x * p(x)
    twoprod(p, x, p_hi, p_lo);
    // C + x * p(x)
    Packet q1_hi, q1_lo;
    twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
    // x * (C + x * p(x))
    Packet q2_hi, q2_lo;
    twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);
    // 1 + x * (C + x * p(x))
    Packet q3_hi, q3_lo;
    // Since |q2_hi| <= sqrt(2)-1 < 1, we can use fast_twosum
    // for adding it to unity here.
    fast_twosum(one, q2_hi, q3_hi, q3_lo);
    return padd(q3_hi, padd(q2_lo, q3_lo));
  }
};

// in [-0.5;0.5] with a relative accuracy of 1 ulp.
// The minimax polynomial used was calculated using the Sollya tool.
// See sollya.org.
template <>
struct fast_accurate_exp2<double> {
  template <typename Packet>
  EIGEN_STRONG_INLINE
  Packet operator()(const Packet& x) {
    // This function approximates exp2(x) by a degree 10 polynomial of the form
    // Q(x) = 1 + x * (C + x * P(x)), where the degree 8 polynomial P(x) is evaluated in
    // single precision, and the remaining steps are evaluated with extra precision using
    // double word arithmetic. C is an extra precise constant stored as a double word.
    //
    // The polynomial coefficients were calculated using Sollya commands:
    // > n = 11;
    // > f = 2^x;
    // > interval = [-0.5;0.5];
    // > p = fpminimax(f,n,[|1,DD,double...|],interval,relative,floating);

    const Packet p9 = pset1<Packet>(4.431642109085495276e-10);
    const Packet p8 = pset1<Packet>(7.073829923303358410e-9);
    const Packet p7 = pset1<Packet>(1.017822306737031311e-7);
    const Packet p6 = pset1<Packet>(1.321543498017646657e-6);
    const Packet p5 = pset1<Packet>(1.525273342728892877e-5);
    const Packet p4 = pset1<Packet>(1.540353045780084423e-4);
    const Packet p3 = pset1<Packet>(1.333355814685869807e-3);
    const Packet p2 = pset1<Packet>(9.618129107593478832e-3);
    const Packet p1 = pset1<Packet>(5.550410866481961247e-2);
    const Packet p0 = pset1<Packet>(0.240226506959101332);
    const Packet C_hi = pset1<Packet>(0.693147180559945286);
    const Packet C_lo = pset1<Packet>(4.81927865669806721e-17);
    const Packet one = pset1<Packet>(1.0);

    // Evaluate P(x) in working precision.
    // We evaluate even and odd parts of the polynomial separately
    // to gain some instruction level parallelism.
    Packet x2 = pmul(x,x);
    Packet p_even = pmadd(p8, x2, p6);
    Packet p_odd = pmadd(p9, x2, p7);
    p_even = pmadd(p_even, x2, p4);
    p_odd = pmadd(p_odd, x2, p5);
    p_even = pmadd(p_even, x2, p2);
    p_odd = pmadd(p_odd, x2, p3);
    p_even = pmadd(p_even, x2, p0);
    p_odd = pmadd(p_odd, x2, p1);
    Packet p = pmadd(p_odd, x, p_even);

    // Evaluate the remaining terms of Q(x) with extra precision using
    // double word arithmetic.
    Packet p_hi, p_lo;
    // x * p(x)
    twoprod(p, x, p_hi, p_lo);
    // C + x * p(x)
    Packet q1_hi, q1_lo;
    twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
    // x * (C + x * p(x))
    Packet q2_hi, q2_lo;
    twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);
    // 1 + x * (C + x * p(x))
    Packet q3_hi, q3_lo;
    // Since |q2_hi| <= sqrt(2)-1 < 1, we can use fast_twosum
    // for adding it to unity here.
    fast_twosum(one, q2_hi, q3_hi, q3_lo);
    return padd(q3_hi, padd(q2_lo, q3_lo));
  }
};

// This function implements the non-trivial case of pow(x,y) where x is
// positive and y is (possibly) non-integer.
// Formally, pow(x,y) = exp2(y * log2(x)), where exp2(x) is shorthand for 2^x.
// TODO(rmlarsen): We should probably add this as a packet up 'ppow', to make it
// easier to specialize or turn off for specific types and/or backends.x
template <typename Packet>
EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet& x, const Packet& y) {
  typedef typename unpacket_traits<Packet>::type Scalar;
  // Split x into exponent e_x and mantissa m_x.
  Packet e_x;
  Packet m_x = pfrexp(x, e_x);

  // Adjust m_x to lie in [1/sqrt(2):sqrt(2)] to minimize absolute error in log2(m_x).
  EIGEN_CONSTEXPR Scalar sqrt_half = Scalar(0.70710678118654752440);
  const Packet m_x_scale_mask = pcmp_lt(m_x, pset1<Packet>(sqrt_half));
  m_x = pselect(m_x_scale_mask, pmul(pset1<Packet>(Scalar(2)), m_x), m_x);
  e_x = pselect(m_x_scale_mask, psub(e_x, pset1<Packet>(Scalar(1))), e_x);

  // Compute log2(m_x) with 6 extra bits of accuracy.
  Packet rx_hi, rx_lo;
  accurate_log2<Scalar>()(m_x, rx_hi, rx_lo);

  // Compute the two terms {y * e_x, y * r_x} in f = y * log2(x) with doubled
  // precision using double word arithmetic.
  Packet f1_hi, f1_lo, f2_hi, f2_lo;
  twoprod(e_x, y, f1_hi, f1_lo);
  twoprod(rx_hi, rx_lo, y, f2_hi, f2_lo);
  // Sum the two terms in f using double word arithmetic. We know
  // that |e_x| > |log2(m_x)|, except for the case where e_x==0.
  // This means that we can use fast_twosum(f1,f2).
  // In the case e_x == 0, e_x * y = f1 = 0, so we don't lose any
  // accuracy by violating the assumption of fast_twosum, because
  // it's a no-op.
  Packet f_hi, f_lo;
  fast_twosum(f1_hi, f1_lo, f2_hi, f2_lo, f_hi, f_lo);

  // Split f into integer and fractional parts.
  Packet n_z, r_z;
  absolute_split(f_hi, n_z, r_z);
  r_z = padd(r_z, f_lo);
  Packet n_r;
  absolute_split(r_z, n_r, r_z);
  n_z = padd(n_z, n_r);

  // We now have an accurate split of f = n_z + r_z and can compute
  //   x^y = 2**{n_z + r_z) = exp2(r_z) * 2**{n_z}.
  // Since r_z is in [-0.5;0.5], we compute the first factor to high accuracy
  // using a specialized algorithm. Multiplication by the second factor can
  // be done exactly using pldexp(), since it is an integer power of 2.
  const Packet e_r = fast_accurate_exp2<Scalar>()(r_z);
  return pldexp(e_r, n_z);
}

// Generic implementation of pow(x,y).
template<typename Packet>
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
EIGEN_UNUSED
Packet generic_pow(const Packet& x, const Packet& y) {
  typedef typename unpacket_traits<Packet>::type Scalar;

  const Packet cst_pos_inf = pset1<Packet>(NumTraits<Scalar>::infinity());
  const Packet cst_zero = pset1<Packet>(Scalar(0));
  const Packet cst_one = pset1<Packet>(Scalar(1));
  const Packet cst_nan = pset1<Packet>(NumTraits<Scalar>::quiet_NaN());

  const Packet abs_x = pabs(x);
  // Predicates for sign and magnitude of x.
  const Packet x_is_zero = pcmp_eq(x, cst_zero);
  const Packet x_is_neg = pcmp_lt(x, cst_zero);
  const Packet abs_x_is_inf = pcmp_eq(abs_x, cst_pos_inf);
  const Packet abs_x_is_one =  pcmp_eq(abs_x, cst_one);
  const Packet abs_x_is_gt_one = pcmp_lt(cst_one, abs_x);
  const Packet abs_x_is_lt_one = pcmp_lt(abs_x, cst_one);
  const Packet x_is_one =  pandnot(abs_x_is_one, x_is_neg);
  const Packet x_is_neg_one =  pand(abs_x_is_one, x_is_neg);
  const Packet x_is_nan = pandnot(ptrue(x), pcmp_eq(x, x));

  // Predicates for sign and magnitude of y.
  const Packet y_is_one = pcmp_eq(y, cst_one);
  const Packet y_is_zero = pcmp_eq(y, cst_zero);
  const Packet y_is_neg = pcmp_lt(y, cst_zero);
  const Packet y_is_pos = pandnot(ptrue(y), por(y_is_zero, y_is_neg));
  const Packet y_is_nan = pandnot(ptrue(y), pcmp_eq(y, y));
  const Packet abs_y_is_inf = pcmp_eq(pabs(y), cst_pos_inf);
  EIGEN_CONSTEXPR Scalar huge_exponent =
      (NumTraits<Scalar>::max_exponent() * Scalar(EIGEN_LN2)) /
       NumTraits<Scalar>::epsilon();
  const Packet abs_y_is_huge = pcmp_le(pset1<Packet>(huge_exponent), pabs(y));

  // Predicates for whether y is integer and/or even.
  const Packet y_is_int = pcmp_eq(pfloor(y), y);
  const Packet y_div_2 = pmul(y, pset1<Packet>(Scalar(0.5)));
  const Packet y_is_even = pcmp_eq(pround(y_div_2), y_div_2);

  // Predicates encoding special cases for the value of pow(x,y)
  const Packet invalid_negative_x = pandnot(pandnot(pandnot(x_is_neg, abs_x_is_inf),
                                                    y_is_int),
                                            abs_y_is_inf);
  const Packet pow_is_one = por(por(x_is_one, y_is_zero),
                                pand(x_is_neg_one,
                                     por(abs_y_is_inf, pandnot(y_is_even, invalid_negative_x))));
  const Packet pow_is_nan = por(invalid_negative_x, por(x_is_nan, y_is_nan));
  const Packet pow_is_zero = por(por(por(pand(x_is_zero, y_is_pos),
                                         pand(abs_x_is_inf, y_is_neg)),
                                     pand(pand(abs_x_is_lt_one, abs_y_is_huge),
                                          y_is_pos)),
                                 pand(pand(abs_x_is_gt_one, abs_y_is_huge),
                                      y_is_neg));
  const Packet pow_is_inf = por(por(por(pand(x_is_zero, y_is_neg),
                                        pand(abs_x_is_inf, y_is_pos)),
                                    pand(pand(abs_x_is_lt_one, abs_y_is_huge),
                                         y_is_neg)),
                                pand(pand(abs_x_is_gt_one, abs_y_is_huge),
                                     y_is_pos));

  // General computation of pow(x,y) for positive x or negative x and integer y.
  const Packet negate_pow_abs = pandnot(x_is_neg, y_is_even);
  const Packet pow_abs = generic_pow_impl(abs_x, y);
  return pselect(y_is_one, x,
                 pselect(pow_is_one, cst_one,
                         pselect(pow_is_nan, cst_nan,
                                 pselect(pow_is_inf, cst_pos_inf,
                                         pselect(pow_is_zero, cst_zero,
                                                 pselect(negate_pow_abs, pnegate(pow_abs), pow_abs))))));
}

/* polevl (modified for Eigen)
*
*      Evaluate polynomial
*
*
*
* SYNOPSIS:
*
* int N;
* Scalar x, y, coef[N+1];
*
* y = polevl<decltype(x), N>( x, coef);
*
*
*
* DESCRIPTION:
*
* Evaluates polynomial of degree N:
*
*                     2          N
* y  =  C  + C x + C x  +...+ C x
*        0    1     2          N
*
* Coefficients are stored in reverse order:
*
* coef[0] = C  , ..., coef[N] = C  .
*            N                   0
*
*  The function p1evl() assumes that coef[N] = 1.0 and is
* omitted from the array.  Its calling arguments are
* otherwise the same as polevl().
*
*
* The Eigen implementation is templatized.  For best speed, store
* coef as a const array (constexpr), e.g.
*
* const double coef[] = {1.0, 2.0, 3.0, ...};
*
*/
template <typename Packet, int N>
struct ppolevl {
  static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& x, const typename unpacket_traits<Packet>::type coeff[]) {
    EIGEN_STATIC_ASSERT((N > 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
    return pmadd(ppolevl<Packet, N-1>::run(x, coeff), x, pset1<Packet>(coeff[N]));
  }
--> --------------------

--> maximum size reached

--> --------------------

quality99%

¤ Dauer der Verarbeitung: 0.29 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung ist noch experimentell.