// This file is part of Eigen, a lightweight C++ template library // for linear algebra. // // Copyright (C) 2010 Gael Guennebaud <gael.guennebaud@inria.fr> // Copyright (C) 2010 Konstantinos Margaritis <markos@freevec.org> // // This Source Code Form is subject to the terms of the Mozilla // Public License v. 2.0. If a copy of the MPL was not distributed // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
template<> EIGEN_STRONG_INLINE Packet1cf padd<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
{ return Packet1cf(padd<Packet2f>(a.v, b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ return Packet2cf(padd<Packet4f>(a.v, b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cf psub<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
{ return Packet1cf(psub<Packet2f>(a.v, b.v)); } template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ return Packet2cf(psub<Packet4f>(a.v, b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cf pnegate(const Packet1cf& a) { return Packet1cf(pnegate<Packet2f>(a.v)); } template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
template<> EIGEN_STRONG_INLINE Packet1cf pconj(const Packet1cf& a)
{ const Packet2ui b = vreinterpret_u32_f32(a.v); return Packet1cf(vreinterpret_f32_u32(veor_u32(b, p2ui_CONJ_XOR())));
} template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
{ const Packet4ui b = vreinterpretq_u32_f32(a.v); return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR())));
}
template<> EIGEN_STRONG_INLINE Packet1cf pmul<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
{
Packet2f v1, v2;
// Get the real values of a | a1_re | a1_re |
v1 = vdup_lane_f32(a.v, 0); // Get the imag values of a | a1_im | a1_im |
v2 = vdup_lane_f32(a.v, 1); // Multiply the real a with b
v1 = vmul_f32(v1, b.v); // Multiply the imag a with b
v2 = vmul_f32(v2, b.v); // Conjugate v2
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR())); // Swap real/imag elements in v2.
v2 = vrev64_f32(v2); // Add and return the result return Packet1cf(vadd_f32(v1, v2));
} template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{
Packet4f v1, v2;
// Get the real values of a | a1_re | a1_re | a2_re | a2_re |
v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0)); // Get the imag values of a | a1_im | a1_im | a2_im | a2_im |
v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1)); // Multiply the real a with b
v1 = vmulq_f32(v1, b.v); // Multiply the imag a with b
v2 = vmulq_f32(v2, b.v); // Conjugate v2
v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR())); // Swap real/imag elements in v2.
v2 = vrev64q_f32(v2); // Add and return the result return Packet2cf(vaddq_f32(v1, v2));
}
template<> EIGEN_STRONG_INLINE Packet1cf pcmp_eq(const Packet1cf& a, const Packet1cf& b)
{ // Compare real and imaginary parts of a and b to get the mask vector: // [re(a[0])==re(b[0]), im(a[0])==im(b[0])]
Packet2f eq = pcmp_eq<Packet2f>(a.v, b.v); // Swap real/imag elements in the mask in to get: // [im(a[0])==im(b[0]), re(a[0])==re(b[0])]
Packet2f eq_swapped = vrev64_f32(eq); // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped return Packet1cf(pand<Packet2f>(eq, eq_swapped));
} template<> EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf& a, const Packet2cf& b)
{ // Compare real and imaginary parts of a and b to get the mask vector: // [re(a[0])==re(b[0]), im(a[0])==im(b[0]), re(a[1])==re(b[1]), im(a[1])==im(b[1])]
Packet4f eq = pcmp_eq<Packet4f>(a.v, b.v); // Swap real/imag elements in the mask in to get: // [im(a[0])==im(b[0]), re(a[0])==re(b[0]), im(a[1])==im(b[1]), re(a[1])==re(b[1])]
Packet4f eq_swapped = vrev64q_f32(eq); // Return re(a)==re(b) && im(a)==im(b) by computing bitwise AND of eq and eq_swapped return Packet2cf(pand<Packet4f>(eq, eq_swapped));
}
template<> EIGEN_STRONG_INLINE Packet1cf pand<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
{ return Packet1cf(vreinterpret_f32_u32(vand_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); } template<> EIGEN_STRONG_INLINE Packet2cf pand<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ return Packet2cf(vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
template<> EIGEN_STRONG_INLINE Packet1cf por<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
{ return Packet1cf(vreinterpret_f32_u32(vorr_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); } template<> EIGEN_STRONG_INLINE Packet2cf por<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
template<> EIGEN_STRONG_INLINE Packet1cf pxor<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
{ return Packet1cf(vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); } template<> EIGEN_STRONG_INLINE Packet2cf pxor<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
template<> EIGEN_STRONG_INLINE Packet1cf pandnot<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
{ return Packet1cf(vreinterpret_f32_u32(vbic_u32(vreinterpret_u32_f32(a.v), vreinterpret_u32_f32(b.v)))); } template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v), vreinterpretq_u32_f32(b.v)))); }
a1 = vget_low_f32(a.v);
a2 = vget_high_f32(a.v); // Get the real values of a | a1_re | a1_re | a2_re | a2_re |
v1 = vdup_lane_f32(a1, 0); // Get the real values of a | a1_im | a1_im | a2_im | a2_im |
v2 = vdup_lane_f32(a1, 1); // Multiply the real a with b
v1 = vmul_f32(v1, a2); // Multiply the imag a with b
v2 = vmul_f32(v2, a2); // Conjugate v2
v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR())); // Swap real/imag elements in v2.
v2 = vrev64_f32(v2); // Add v1, v2
prod = vadd_f32(v1, v2);
template<> EIGEN_STRONG_INLINE Packet1cf pdiv<Packet1cf>(const Packet1cf& a, const Packet1cf& b)
{ // TODO optimize it for NEON
Packet1cf res = pmul(a, pconj(b));
Packet2f s, rev_s;
// this computes the norm
s = vmul_f32(b.v, b.v);
rev_s = vrev64_f32(s);
return Packet1cf(pdiv<Packet2f>(res.v, vadd_f32(s, rev_s)));
} template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
{ // TODO optimize it for NEON
Packet2cf res = pmul(a,pconj(b));
Packet4f s, rev_s;
// this computes the norm
s = vmulq_f32(b.v, b.v);
rev_s = vrev64q_f32(s);
template<> EIGEN_STRONG_INLINE Packet1cd pset1<Packet1cd>(const std::complex<double>& from)
{ /* here we really have to use unaligned loads :( */ return ploadu<Packet1cd>(&from);
}
template<> EIGEN_STRONG_INLINE Packet1cd padd<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ return Packet1cd(padd<Packet2d>(a.v, b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd psub<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ return Packet1cd(psub<Packet2d>(a.v, b.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pnegate(const Packet1cd& a)
{ return Packet1cd(pnegate<Packet2d>(a.v)); }
template<> EIGEN_STRONG_INLINE Packet1cd pmul<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{
Packet2d v1, v2;
// Get the real values of a
v1 = vdupq_lane_f64(vget_low_f64(a.v), 0); // Get the imag values of a
v2 = vdupq_lane_f64(vget_high_f64(a.v), 0); // Multiply the real a with b
v1 = vmulq_f64(v1, b.v); // Multiply the imag a with b
v2 = vmulq_f64(v2, b.v); // Conjugate v2
v2 = vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(v2), p2ul_CONJ_XOR)); // Swap real/imag elements in v2.
v2 = preverse<Packet2d>(v2); // Add and return the result return Packet1cd(vaddq_f64(v1, v2));
}
template<> EIGEN_STRONG_INLINE Packet1cd pcmp_eq(const Packet1cd& a, const Packet1cd& b)
{ // Compare real and imaginary parts of a and b to get the mask vector: // [re(a)==re(b), im(a)==im(b)]
Packet2d eq = pcmp_eq<Packet2d>(a.v, b.v); // Swap real/imag elements in the mask in to get: // [im(a)==im(b), re(a)==re(b)]
Packet2d eq_swapped = vreinterpretq_f64_u32(vrev64q_u32(vreinterpretq_u32_f64(eq))); // Return re(a)==re(b) & im(a)==im(b) by computing bitwise AND of eq and eq_swapped return Packet1cd(pand<Packet2d>(eq, eq_swapped));
}
template<> EIGEN_STRONG_INLINE Packet1cd pand<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ return Packet1cd(vreinterpretq_f64_u64(vandq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
template<> EIGEN_STRONG_INLINE Packet1cd por<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ return Packet1cd(vreinterpretq_f64_u64(vorrq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
template<> EIGEN_STRONG_INLINE Packet1cd pxor<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ return Packet1cd(vreinterpretq_f64_u64(veorq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
template<> EIGEN_STRONG_INLINE Packet1cd pandnot<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ return Packet1cd(vreinterpretq_f64_u64(vbicq_u64(vreinterpretq_u64_f64(a.v),vreinterpretq_u64_f64(b.v)))); }
template<> EIGEN_STRONG_INLINE Packet1cd pdiv<Packet1cd>(const Packet1cd& a, const Packet1cd& b)
{ // TODO optimize it for NEON
Packet1cd res = pmul(a,pconj(b));
Packet2d s = pmul<Packet2d>(b.v, b.v);
Packet2d rev_s = preverse<Packet2d>(s);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.