/*************************************************************************** * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * * Martin Renou * * Copyright (c) QuantStack * * Copyright (c) Serge Guelton * * * * Distributed under the terms of the BSD 3-Clause License. * * * * The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/
// bitwise_and template <class A>
XSIMD_INLINE batch<float, A> bitwise_and(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512dq>) noexcept
{ return _mm512_and_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_and(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512dq>) noexcept
{ return _mm512_and_pd(self, other);
}
// bitwise_andnot template <class A>
XSIMD_INLINE batch<float, A> bitwise_andnot(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512dq>) noexcept
{ return _mm512_andnot_ps(other, self);
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_andnot(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512dq>) noexcept
{ return _mm512_andnot_pd(other, self);
}
// bitwise_not template <class A>
XSIMD_INLINE batch<float, A> bitwise_not(batch<float, A> const& self, requires_arch<avx512f>) noexcept
{ return _mm512_xor_ps(self, _mm512_castsi512_ps(_mm512_set1_epi32(-1)));
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_not(batch<double, A> const& self, requires_arch<avx512f>) noexcept
{ return _mm512_xor_pd(self, _mm512_castsi512_pd(_mm512_set1_epi32(-1)));
}
// bitwise_or template <class A>
XSIMD_INLINE batch<float, A> bitwise_or(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512dq>) noexcept
{ return _mm512_or_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_or(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512dq>) noexcept
{ return _mm512_or_pd(self, other);
}
template <class A, class T>
XSIMD_INLINE batch_bool<T, A> bitwise_or(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx512dq>) noexcept
{ using register_type = typename batch_bool<T, A>::register_type; return register_type(self.data | other.data);
}
// bitwise_xor template <class A>
XSIMD_INLINE batch<float, A> bitwise_xor(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512dq>) noexcept
{ return _mm512_xor_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_xor(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512dq>) noexcept
{ return _mm512_xor_pd(self, other);
}
// haddp template <class A>
XSIMD_INLINE batch<float, A> haddp(batch<float, A> const* row, requires_arch<avx512dq>) noexcept
{ // The following folds over the vector once: // tmp1 = [a0..8, b0..8] // tmp2 = [a8..f, b8..f] #define XSIMD_AVX512_HADDP_STEP1(I, a, b) \
batch<float, avx512f> res##I; \
{ \ auto tmp1 = _mm512_shuffle_f32x4(a, b, _MM_SHUFFLE(1, 0, 1, 0)); \ auto tmp2 = _mm512_shuffle_f32x4(a, b, _MM_SHUFFLE(3, 2, 3, 2)); \
res##I = _mm512_add_ps(tmp1, tmp2); \
}
// mul template <class A>
XSIMD_INLINE batch<uint64_t, A> mul(batch<uint64_t, A> const& self, batch<uint64_t, A> const& other, requires_arch<avx512dq>) noexcept
{ return _mm512_mullo_epi64(self, other);
}
template <class A>
XSIMD_INLINE batch<int64_t, A> mul(batch<int64_t, A> const& self, batch<int64_t, A> const& other, requires_arch<avx512dq>) noexcept
{ return _mm512_mullo_epi64(self, other);
}
// nearbyint_as_int template <class A>
XSIMD_INLINE batch<int64_t, A> nearbyint_as_int(batch<double, A> const& self,
requires_arch<avx512dq>) noexcept
{ return _mm512_cvtpd_epi64(self);
}
// convert namespace detail
{ template <class A>
XSIMD_INLINE batch<double, A> fast_cast(batch<int64_t, A> const& x, batch<double, A> const&, requires_arch<avx512dq>) noexcept
{ return _mm512_cvtepi64_pd(self);
}
template <class A>
XSIMD_INLINE batch<int64_t, A> fast_cast(batch<double, A> const& self, batch<int64_t, A> const&, requires_arch<avx512dq>) noexcept
{ return _mm512_cvttpd_epi64(self);
}
}
}
}
#endif
Messung V0.5
¤ Dauer der Verarbeitung: 0.14 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.