/*************************************************************************** * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * * Martin Renou * * Copyright (c) QuantStack * * Copyright (c) Serge Guelton * * * * Distributed under the terms of the BSD 3-Clause License. * * * * The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/
// fwd template <class A, class T, size_t I>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<generic>) noexcept;
// abs template <class A>
XSIMD_INLINE batch<float, A> abs(batch<float, A> const& self, requires_arch<avx>) noexcept
{
__m256 sign_mask = _mm256_set1_ps(-0.f); // -0.f = 1 << 31 return _mm256_andnot_ps(sign_mask, self);
} template <class A>
XSIMD_INLINE batch<double, A> abs(batch<double, A> const& self, requires_arch<avx>) noexcept
{
__m256d sign_mask = _mm256_set1_pd(-0.f); // -0.f = 1 << 31 return _mm256_andnot_pd(sign_mask, self);
}
// add template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> add(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
{ return add(batch<T, sse4_2>(s), batch<T, sse4_2>(o)); },
self, other);
} template <class A>
XSIMD_INLINE batch<float, A> add(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_add_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> add(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_add_pd(self, other);
}
// all template <class A>
XSIMD_INLINE bool all(batch_bool<float, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_testc_ps(self, batch_bool<float, A>(true)) != 0;
} template <class A>
XSIMD_INLINE bool all(batch_bool<double, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_testc_pd(self, batch_bool<double, A>(true)) != 0;
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE bool all(batch_bool<T, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_testc_si256(self, batch_bool<T, A>(true)) != 0;
}
// any template <class A>
XSIMD_INLINE bool any(batch_bool<float, A> const& self, requires_arch<avx>) noexcept
{ return !_mm256_testz_ps(self, self);
} template <class A>
XSIMD_INLINE bool any(batch_bool<double, A> const& self, requires_arch<avx>) noexcept
{ return !_mm256_testz_pd(self, self);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE bool any(batch_bool<T, A> const& self, requires_arch<avx>) noexcept
{ return !_mm256_testz_si256(self, self);
}
// batch_bool_cast template <class A, class T_out, class T_in>
XSIMD_INLINE batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& self, batch_bool<T_out, A> const&, requires_arch<avx>) noexcept
{ return { bitwise_cast<T_out>(batch<T_in, A>(self.data)).data };
}
// bitwise_and template <class A>
XSIMD_INLINE batch<float, A> bitwise_and(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_and_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_and(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_and_pd(self, other);
}
template <class A>
XSIMD_INLINE batch_bool<float, A> bitwise_and(batch_bool<float, A> const& self, batch_bool<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_and_ps(self, other);
} template <class A>
XSIMD_INLINE batch_bool<double, A> bitwise_and(batch_bool<double, A> const& self, batch_bool<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_and_pd(self, other);
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_and(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
{ return bitwise_and(batch<T, sse4_2>(s), batch<T, sse4_2>(o)); },
self, other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> bitwise_and(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
{ return bitwise_and(batch<T, sse4_2>(s), batch<T, sse4_2>(o)); },
self, other);
}
// bitwise_andnot template <class A>
XSIMD_INLINE batch<float, A> bitwise_andnot(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_andnot_ps(other, self);
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_andnot(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_andnot_pd(other, self);
}
template <class A>
XSIMD_INLINE batch_bool<float, A> bitwise_andnot(batch_bool<float, A> const& self, batch_bool<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_andnot_ps(other, self);
} template <class A>
XSIMD_INLINE batch_bool<double, A> bitwise_andnot(batch_bool<double, A> const& self, batch_bool<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_andnot_pd(other, self);
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_andnot(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
{ return bitwise_andnot(batch<T, sse4_2>(s), batch<T, sse4_2>(o)); },
self, other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> bitwise_andnot(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
{ return bitwise_andnot(batch<T, sse4_2>(s), batch<T, sse4_2>(o)); },
self, other);
}
// bitwise_lshift template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_lshift(batch<T, A> const& self, int32_t other, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s, int32_t o) noexcept
{ return bitwise_lshift(batch<T, sse4_2>(s), o, sse4_2 {}); },
self, other);
}
// bitwise_not template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_not(batch<T, A> const& self, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s) noexcept
{ return bitwise_not(batch<T, sse4_2>(s), sse4_2 {}); },
self);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> bitwise_not(batch_bool<T, A> const& self, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s) noexcept
{ return bitwise_not(batch_bool<T, sse4_2>(s), sse4_2 {}); },
self);
}
// bitwise_or template <class A>
XSIMD_INLINE batch<float, A> bitwise_or(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_or_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_or(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_or_pd(self, other);
} template <class A>
XSIMD_INLINE batch_bool<float, A> bitwise_or(batch_bool<float, A> const& self, batch_bool<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_or_ps(self, other);
} template <class A>
XSIMD_INLINE batch_bool<double, A> bitwise_or(batch_bool<double, A> const& self, batch_bool<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_or_pd(self, other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_or(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
{ return bitwise_or(batch<T, sse4_2>(s), batch<T, sse4_2>(o)); },
self, other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> bitwise_or(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
{ return bitwise_or(batch_bool<T, sse4_2>(s), batch_bool<T, sse4_2>(o)); },
self, other);
}
// bitwise_rshift template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_rshift(batch<T, A> const& self, int32_t other, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s, int32_t o) noexcept
{ return bitwise_rshift(batch<T, sse4_2>(s), o, sse4_2 {}); },
self, other);
}
// bitwise_xor template <class A>
XSIMD_INLINE batch<float, A> bitwise_xor(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_xor_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_xor(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_xor_pd(self, other);
} template <class A>
XSIMD_INLINE batch_bool<float, A> bitwise_xor(batch_bool<float, A> const& self, batch_bool<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_xor_ps(self, other);
} template <class A>
XSIMD_INLINE batch_bool<double, A> bitwise_xor(batch_bool<double, A> const& self, batch_bool<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_xor_pd(self, other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_xor(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
{ return bitwise_xor(batch<T, sse4_2>(s), batch<T, sse4_2>(o), sse4_2 {}); },
self, other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_xor(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
{ return bitwise_xor(batch_bool<T, sse4_2>(s), batch_bool<T, sse4_2>(o), sse4_2 {}); },
self, other);
}
// bitwise_cast template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<float, A> bitwise_cast(batch<T, A> const& self, batch<float, A> const&, requires_arch<avx>) noexcept
{ return _mm256_castsi256_ps(self);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<double, A> bitwise_cast(batch<T, A> const& self, batch<double, A> const&, requires_arch<avx>) noexcept
{ return _mm256_castsi256_pd(self);
} template <class A, class T, class Tp, class = typename std::enable_if<std::is_integral<typename std::common_type<T, Tp>::type>::value, void>::type>
XSIMD_INLINE batch<Tp, A> bitwise_cast(batch<T, A> const& self, batch<Tp, A> const&, requires_arch<avx>) noexcept
{ return batch<Tp, A>(self.data);
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_cast(batch<float, A> const& self, batch<double, A> const&, requires_arch<avx>) noexcept
{ return _mm256_castps_pd(self);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_cast(batch<float, A> const& self, batch<T, A> const&, requires_arch<avx>) noexcept
{ return _mm256_castps_si256(self);
} template <class A>
XSIMD_INLINE batch<float, A> bitwise_cast(batch<double, A> const& self, batch<float, A> const&, requires_arch<avx>) noexcept
{ return _mm256_castpd_ps(self);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_cast(batch<double, A> const& self, batch<T, A> const&, requires_arch<avx>) noexcept
{ return _mm256_castpd_si256(self);
}
// bitwise_not template <class A>
XSIMD_INLINE batch<float, A> bitwise_not(batch<float, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_xor_ps(self, _mm256_castsi256_ps(_mm256_set1_epi32(-1)));
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_not(batch<double, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_xor_pd(self, _mm256_castsi256_pd(_mm256_set1_epi32(-1)));
} template <class A>
XSIMD_INLINE batch_bool<float, A> bitwise_not(batch_bool<float, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_xor_ps(self, _mm256_castsi256_ps(_mm256_set1_epi32(-1)));
} template <class A>
XSIMD_INLINE batch_bool<double, A> bitwise_not(batch_bool<double, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_xor_pd(self, _mm256_castsi256_pd(_mm256_set1_epi32(-1)));
}
// ceil template <class A>
XSIMD_INLINE batch<float, A> ceil(batch<float, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_ceil_ps(self);
} template <class A>
XSIMD_INLINE batch<double, A> ceil(batch<double, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_ceil_pd(self);
}
namespace detail
{ // On clang, _mm256_extractf128_ps is built upon build_shufflevector // which require index parameter to be a constant template <int index, class B>
XSIMD_INLINE B get_half_complex_f(const B& real, const B& imag) noexcept
{
__m128 tmp0 = _mm256_extractf128_ps(real, index);
__m128 tmp1 = _mm256_extractf128_ps(imag, index);
__m128 tmp2 = _mm_unpackhi_ps(tmp0, tmp1);
tmp0 = _mm_unpacklo_ps(tmp0, tmp1);
__m256 res = real;
res = _mm256_insertf128_ps(res, tmp0, 0);
res = _mm256_insertf128_ps(res, tmp2, 1); return res;
} template <int index, class B>
XSIMD_INLINE B get_half_complex_d(const B& real, const B& imag) noexcept
{
__m128d tmp0 = _mm256_extractf128_pd(real, index);
__m128d tmp1 = _mm256_extractf128_pd(imag, index);
__m128d tmp2 = _mm_unpackhi_pd(tmp0, tmp1);
tmp0 = _mm_unpacklo_pd(tmp0, tmp1);
__m256d res = real;
res = _mm256_insertf128_pd(res, tmp0, 0);
res = _mm256_insertf128_pd(res, tmp2, 1); return res;
}
// complex_low template <class A>
XSIMD_INLINE batch<float, A> complex_low(batch<std::complex<float>, A> const& self, requires_arch<avx>) noexcept
{ return get_half_complex_f<0>(self.real(), self.imag());
} template <class A>
XSIMD_INLINE batch<double, A> complex_low(batch<std::complex<double>, A> const& self, requires_arch<avx>) noexcept
{ return get_half_complex_d<0>(self.real(), self.imag());
}
// complex_high template <class A>
XSIMD_INLINE batch<float, A> complex_high(batch<std::complex<float>, A> const& self, requires_arch<avx>) noexcept
{ return get_half_complex_f<1>(self.real(), self.imag());
} template <class A>
XSIMD_INLINE batch<double, A> complex_high(batch<std::complex<double>, A> const& self, requires_arch<avx>) noexcept
{ return get_half_complex_d<1>(self.real(), self.imag());
}
}
// fast_cast namespace detail
{ template <class A>
XSIMD_INLINE batch<float, A> fast_cast(batch<int32_t, A> const& self, batch<float, A> const&, requires_arch<avx>) noexcept
{ return _mm256_cvtepi32_ps(self);
}
template <class A>
XSIMD_INLINE batch<int32_t, A> fast_cast(batch<float, A> const& self, batch<int32_t, A> const&, requires_arch<avx>) noexcept
{ return _mm256_cvttps_epi32(self);
}
}
// decr_if template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> decr_if(batch<T, A> const& self, batch_bool<T, A> const& mask, requires_arch<avx>) noexcept
{ return self + batch<T, A>(mask.data);
}
// div template <class A>
XSIMD_INLINE batch<float, A> div(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_div_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> div(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_div_pd(self, other);
}
// eq template <class A>
XSIMD_INLINE batch_bool<float, A> eq(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_cmp_ps(self, other, _CMP_EQ_OQ);
} template <class A>
XSIMD_INLINE batch_bool<double, A> eq(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_cmp_pd(self, other, _CMP_EQ_OQ);
} template <class A>
XSIMD_INLINE batch_bool<float, A> eq(batch_bool<float, A> const& self, batch_bool<float, A> const& other, requires_arch<avx>) noexcept
{ return ~(self != other);
} template <class A>
XSIMD_INLINE batch_bool<double, A> eq(batch_bool<double, A> const& self, batch_bool<double, A> const& other, requires_arch<avx>) noexcept
{ return ~(self != other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>) noexcept
{ return detail::fwd_to_sse([](__m128i s, __m128i o) noexcept
{ return eq(batch<T, sse4_2>(s), batch<T, sse4_2>(o), sse4_2 {}); },
self, other);
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> eq(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx>) noexcept
{ return ~(self != other);
}
// floor template <class A>
XSIMD_INLINE batch<float, A> floor(batch<float, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_floor_ps(self);
} template <class A>
XSIMD_INLINE batch<double, A> floor(batch<double, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_floor_pd(self);
}
// isnan template <class A>
XSIMD_INLINE batch_bool<float, A> isnan(batch<float, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_cmp_ps(self, self, _CMP_UNORD_Q);
} template <class A>
XSIMD_INLINE batch_bool<double, A> isnan(batch<double, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_cmp_pd(self, self, _CMP_UNORD_Q);
}
// le template <class A>
XSIMD_INLINE batch_bool<float, A> le(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_cmp_ps(self, other, _CMP_LE_OQ);
} template <class A>
XSIMD_INLINE batch_bool<double, A> le(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_cmp_pd(self, other, _CMP_LE_OQ);
}
// load_aligned template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> load_aligned(T const* mem, convert<T>, requires_arch<avx>) noexcept
{ return _mm256_load_si256((__m256i const*)mem);
} template <class A>
XSIMD_INLINE batch<float, A> load_aligned(floatconst* mem, convert<float>, requires_arch<avx>) noexcept
{ return _mm256_load_ps(mem);
} template <class A>
XSIMD_INLINE batch<double, A> load_aligned(doubleconst* mem, convert<double>, requires_arch<avx>) noexcept
{ return _mm256_load_pd(mem);
}
template <class A>
XSIMD_INLINE uint64_t mask(batch_bool<double, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_movemask_pd(self);
}
// max template <class A>
XSIMD_INLINE batch<float, A> max(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_max_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> max(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_max_pd(self, other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> max(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>) noexcept
{ return select(self > other, self, other);
}
// min template <class A>
XSIMD_INLINE batch<float, A> min(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_min_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> min(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_min_pd(self, other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> min(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>) noexcept
{ return select(self <= other, self, other);
}
// mul template <class A>
XSIMD_INLINE batch<float, A> mul(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_mul_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> mul(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_mul_pd(self, other);
}
// nearbyint template <class A>
XSIMD_INLINE batch<float, A> nearbyint(batch<float, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_round_ps(self, _MM_FROUND_TO_NEAREST_INT);
} template <class A>
XSIMD_INLINE batch<double, A> nearbyint(batch<double, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_round_pd(self, _MM_FROUND_TO_NEAREST_INT);
}
// nearbyint_as_int template <class A>
XSIMD_INLINE batch<int32_t, A> nearbyint_as_int(batch<float, A> const& self,
requires_arch<avx>) noexcept
{ return _mm256_cvtps_epi32(self);
}
// neg template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> neg(batch<T, A> const& self, requires_arch<avx>) noexcept
{ return 0 - self;
} template <class A>
batch<float, A> neg(batch<float, A> const& self, requires_arch<avx>)
{ return _mm256_xor_ps(self, _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
} template <class A>
XSIMD_INLINE batch<double, A> neg(batch<double, A> const& self, requires_arch<avx>) noexcept
{ return _mm256_xor_pd(self, _mm256_castsi256_pd(_mm256_set1_epi64x(0x8000000000000000)));
}
// neq template <class A>
XSIMD_INLINE batch_bool<float, A> neq(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_cmp_ps(self, other, _CMP_NEQ_UQ);
} template <class A>
XSIMD_INLINE batch_bool<double, A> neq(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_cmp_pd(self, other, _CMP_NEQ_UQ);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> neq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx>) noexcept
{ return ~(self == other);
}
template <class A>
XSIMD_INLINE batch_bool<float, A> neq(batch_bool<float, A> const& self, batch_bool<float, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_xor_ps(self, other);
} template <class A>
XSIMD_INLINE batch_bool<double, A> neq(batch_bool<double, A> const& self, batch_bool<double, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_xor_pd(self, other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> neq(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx>) noexcept
{ return _mm256_castps_si256(_mm256_xor_ps(_mm256_castsi256_ps(self.data), _mm256_castsi256_ps(other.data)));
}
// reciprocal template <class A>
XSIMD_INLINE batch<float, A> reciprocal(batch<float, A> const& self,
kernel::requires_arch<avx>) noexcept
{ return _mm256_rcp_ps(self);
}
__m128i res_low = select(batch_bool<T, sse4_2>(cond_low), batch<T, sse4_2>(true_low), batch<T, sse4_2>(false_low), sse4_2 {});
__m128i res_hi = select(batch_bool<T, sse4_2>(cond_hi), batch<T, sse4_2>(true_hi), batch<T, sse4_2>(false_hi), sse4_2 {}); return detail::merge_sse(res_low, res_hi);
} template <class A, class T, bool... Values, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> select(batch_bool_constant<T, A, Values...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<avx>) noexcept
{ return select(batch_bool<T, A> { Values... }, true_br, false_br, avx2 {});
}
template <class A, bool... Values>
XSIMD_INLINE batch<float, A> select(batch_bool_constant<float, A, Values...> const&, batch<float, A> const& true_br, batch<float, A> const& false_br, requires_arch<avx>) noexcept
{
constexpr auto mask = batch_bool_constant<float, A, Values...>::mask(); return _mm256_blend_ps(false_br, true_br, mask);
}
template <class A, bool... Values>
XSIMD_INLINE batch<double, A> select(batch_bool_constant<double, A, Values...> const&, batch<double, A> const& true_br, batch<double, A> const& false_br, requires_arch<avx>) noexcept
{
constexpr auto mask = batch_bool_constant<double, A, Values...>::mask(); return _mm256_blend_pd(false_br, true_br, mask);
}
// set template <class A, class... Values>
XSIMD_INLINE batch<float, A> set(batch<float, A> const&, requires_arch<avx>, Values... values) noexcept
{
static_assert(sizeof...(Values) == batch<float, A>::size, "consistent init"); return _mm256_setr_ps(values...);
}
template <class A, class... Values>
XSIMD_INLINE batch<double, A> set(batch<double, A> const&, requires_arch<avx>, Values... values) noexcept
{
static_assert(sizeof...(Values) == batch<double, A>::size, "consistent init"); return _mm256_setr_pd(values...);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<avx>, T v0, T v1, T v2, T v3) noexcept
{ return _mm256_set_epi64x(v3, v2, v1, v0);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<avx>, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7) noexcept
{ return _mm256_setr_epi32(v0, v1, v2, v3, v4, v5, v6, v7);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<avx>, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15) noexcept
{ return _mm256_setr_epi16(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<avx>, T v0, T v1, T v2, T v3, T v4, T v5, T v6, T v7, T v8, T v9, T v10, T v11, T v12, T v13, T v14, T v15,
T v16, T v17, T v18, T v19, T v20, T v21, T v22, T v23, T v24, T v25, T v26, T v27, T v28, T v29, T v30, T v31) noexcept
{ return _mm256_setr_epi8(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31);
}
template <class A, class T, class... Values, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> set(batch_bool<T, A> const&, requires_arch<avx>, Values... values) noexcept
{ return set(batch<T, A>(), A {}, static_cast<T>(values ? -1LL : 0LL)...).data;
}
template <class A, class... Values>
XSIMD_INLINE batch_bool<float, A> set(batch_bool<float, A> const&, requires_arch<avx>, Values... values) noexcept
{
static_assert(sizeof...(Values) == batch_bool<float, A>::size, "consistent init"); return _mm256_castsi256_ps(set(batch<int32_t, A>(), A {}, static_cast<int32_t>(values ? -1LL : 0LL)...).data);
}
template <class A, class... Values>
XSIMD_INLINE batch_bool<double, A> set(batch_bool<double, A> const&, requires_arch<avx>, Values... values) noexcept
{
static_assert(sizeof...(Values) == batch_bool<double, A>::size, "consistent init"); return _mm256_castsi256_pd(set(batch<int64_t, A>(), A {}, static_cast<int64_t>(values ? -1LL : 0LL)...).data);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.