/*************************************************************************** * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * * Martin Renou * * Copyright (c) QuantStack * * Copyright (c) Serge Guelton * * * * Distributed under the terms of the BSD 3-Clause License. * * * * The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/
// add template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> add(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{ return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
{ return add(batch<T, avx2>(s), batch<T, avx2>(o)); },
self, other);
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{ return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
{ return add(batch<T, avx2>(s), batch<T, avx2>(o)); },
self, other);
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{ return _mm512_add_epi32(self, other);
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{ return _mm512_add_epi64(self, other);
} else
{
assert(false && "unsupported arch/op combination"); return {};
}
} template <class A>
XSIMD_INLINE batch<float, A> add(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_add_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> add(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_add_pd(self, other);
}
// all template <class A, class T>
XSIMD_INLINE bool all(batch_bool<T, A> const& self, requires_arch<avx512f>) noexcept
{ using register_type = typename batch_bool<T, A>::register_type; return self.data == register_type(-1);
}
// any template <class A, class T>
XSIMD_INLINE bool any(batch_bool<T, A> const& self, requires_arch<avx512f>) noexcept
{ using register_type = typename batch_bool<T, A>::register_type; return self.data != register_type(0);
}
// batch_bool_cast template <class A, class T_out, class T_in>
XSIMD_INLINE batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& self, batch_bool<T_out, A> const&, requires_arch<avx512f>) noexcept
{ return self.data;
}
// bitwise_and template <class A>
XSIMD_INLINE batch<float, A> bitwise_and(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ #ifdefined(_MSC_VER) return _mm512_and_ps(self, other); #else return _mm512_castsi512_ps(_mm512_and_si512(_mm512_castps_si512(self), _mm512_castps_si512(other))); #endif
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_and(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_castsi512_pd(_mm512_and_si512(_mm512_castpd_si512(self), _mm512_castpd_si512(other)));
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_and(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_and_si512(self, other);
}
template <class A, class T>
XSIMD_INLINE batch_bool<T, A> bitwise_and(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx512f>) noexcept
{ using register_type = typename batch_bool<T, A>::register_type; return register_type(self.data & other.data);
}
// bitwise_andnot template <class A>
XSIMD_INLINE batch<float, A> bitwise_andnot(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_castsi512_ps(_mm512_andnot_si512(_mm512_castps_si512(other), _mm512_castps_si512(self)));
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_andnot(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_castsi512_pd(_mm512_andnot_si512(_mm512_castpd_si512(other), _mm512_castpd_si512(self)));
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_andnot(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_andnot_si512(other, self);
}
template <class A, class T>
XSIMD_INLINE batch_bool<T, A> bitwise_andnot(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx512f>) noexcept
{ using register_type = typename batch_bool<T, A>::register_type; return register_type(self.data & ~other.data);
}
// bitwise_not template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_not(batch<T, A> const& self, requires_arch<avx512f>) noexcept
{ return _mm512_xor_si512(self, _mm512_set1_epi32(-1));
} template <class A, class T>
XSIMD_INLINE batch_bool<T, A> bitwise_not(batch_bool<T, A> const& self, requires_arch<avx512f>) noexcept
{ using register_type = typename batch_bool<T, A>::register_type; return register_type(~self.data);
}
template <class A>
XSIMD_INLINE batch<float, A> bitwise_not(batch<float, A> const& self, requires_arch<avx512f>) noexcept
{ return _mm512_castsi512_ps(_mm512_xor_si512(_mm512_castps_si512(self), _mm512_set1_epi32(-1)));
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_not(batch<double, A> const& self, requires_arch<avx512f>) noexcept
{ return _mm512_castsi512_pd(_mm512_xor_si512(_mm512_castpd_si512(self), _mm512_set1_epi32(-1)));
}
// bitwise_or template <class A>
XSIMD_INLINE batch<float, A> bitwise_or(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_castsi512_ps(_mm512_or_si512(_mm512_castps_si512(self), _mm512_castps_si512(other)));
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_or(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_castsi512_pd(_mm512_or_si512(_mm512_castpd_si512(self), _mm512_castpd_si512(other)));
}
template <class A, class T>
XSIMD_INLINE batch_bool<T, A> bitwise_or(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx512f>) noexcept
{ using register_type = typename batch_bool<T, A>::register_type; return register_type(self.data | other.data);
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_or(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_or_si512(self, other);
}
// bitwise_xor template <class A>
XSIMD_INLINE batch<float, A> bitwise_xor(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_castsi512_ps(_mm512_xor_si512(_mm512_castps_si512(self), _mm512_castps_si512(other)));
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_xor(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_castsi512_pd(_mm512_xor_si512(_mm512_castpd_si512(self), _mm512_castpd_si512(other)));
}
template <class A, class T>
XSIMD_INLINE batch_bool<T, A> bitwise_xor(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx512f>) noexcept
{ using register_type = typename batch_bool<T, A>::register_type; return register_type(self.data | other.data);
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_xor(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_xor_si512(self, other);
}
// bitwise_cast template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<float, A> bitwise_cast(batch<T, A> const& self, batch<float, A> const&, requires_arch<avx512f>) noexcept
{ return _mm512_castsi512_ps(self);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<double, A> bitwise_cast(batch<T, A> const& self, batch<double, A> const&, requires_arch<avx512f>) noexcept
{ return _mm512_castsi512_pd(self);
} template <class A, class T, class Tp, class = typename std::enable_if<std::is_integral<typename std::common_type<T, Tp>::type>::value, void>::type>
XSIMD_INLINE batch<Tp, A> bitwise_cast(batch<T, A> const& self, batch<Tp, A> const&, requires_arch<avx512f>) noexcept
{ return batch<Tp, A>(self.data);
} template <class A>
XSIMD_INLINE batch<double, A> bitwise_cast(batch<float, A> const& self, batch<double, A> const&, requires_arch<avx512f>) noexcept
{ return _mm512_castps_pd(self);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_cast(batch<float, A> const& self, batch<T, A> const&, requires_arch<avx512f>) noexcept
{ return _mm512_castps_si512(self);
} template <class A>
XSIMD_INLINE batch<float, A> bitwise_cast(batch<double, A> const& self, batch<float, A> const&, requires_arch<avx512f>) noexcept
{ return _mm512_castpd_ps(self);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> bitwise_cast(batch<double, A> const& self, batch<T, A> const&, requires_arch<avx512f>) noexcept
{ return _mm512_castpd_si512(self);
}
// ceil template <class A>
XSIMD_INLINE batch<float, A> ceil(batch<float, A> const& self, requires_arch<avx512f>) noexcept
{ return _mm512_roundscale_ps(self, _MM_FROUND_TO_POS_INF);
} template <class A>
XSIMD_INLINE batch<double, A> ceil(batch<double, A> const& self, requires_arch<avx512f>) noexcept
{ return _mm512_roundscale_pd(self, _MM_FROUND_TO_POS_INF);
}
// compress template <class A>
XSIMD_INLINE batch<float, A> compress(batch<float, A> const& self, batch_bool<float, A> const& mask, requires_arch<avx512f>) noexcept
{ return _mm512_maskz_compress_ps(mask.mask(), self);
} template <class A>
XSIMD_INLINE batch<double, A> compress(batch<double, A> const& self, batch_bool<double, A> const& mask, requires_arch<avx512f>) noexcept
{ return _mm512_maskz_compress_pd(mask.mask(), self);
} template <class A>
XSIMD_INLINE batch<int32_t, A> compress(batch<int32_t, A> const& self, batch_bool<int32_t, A> const& mask, requires_arch<avx512f>) noexcept
{ return _mm512_maskz_compress_epi32(mask.mask(), self);
} template <class A>
XSIMD_INLINE batch<uint32_t, A> compress(batch<uint32_t, A> const& self, batch_bool<uint32_t, A> const& mask, requires_arch<avx512f>) noexcept
{ return _mm512_maskz_compress_epi32(mask.mask(), self);
} template <class A>
XSIMD_INLINE batch<int64_t, A> compress(batch<int64_t, A> const& self, batch_bool<int64_t, A> const& mask, requires_arch<avx512f>) noexcept
{ return _mm512_maskz_compress_epi64(mask.mask(), self);
} template <class A>
XSIMD_INLINE batch<uint64_t, A> compress(batch<uint64_t, A> const& self, batch_bool<uint64_t, A> const& mask, requires_arch<avx512f>) noexcept
{ return _mm512_maskz_compress_epi64(mask.mask(), self);
}
// convert namespace detail
{ template <class A>
XSIMD_INLINE batch<float, A> fast_cast(batch<int32_t, A> const& self, batch<float, A> const&, requires_arch<avx512f>) noexcept
{ return _mm512_cvtepi32_ps(self);
}
template <class A>
XSIMD_INLINE batch<int32_t, A> fast_cast(batch<float, A> const& self, batch<int32_t, A> const&, requires_arch<avx512f>) noexcept
{ return _mm512_cvttps_epi32(self);
}
template <class A>
XSIMD_INLINE batch<float, A> fast_cast(batch<uint32_t, A> const& self, batch<float, A> const&, requires_arch<avx512f>) noexcept
{ return _mm512_cvtepu32_ps(self);
}
template <class A>
batch<uint32_t, A> fast_cast(batch<float, A> const& self, batch<uint32_t, A> const&, requires_arch<avx512f>)
{ return _mm512_cvttps_epu32(self);
}
}
// div template <class A>
XSIMD_INLINE batch<float, A> div(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_div_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> div(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_div_pd(self, other);
}
// eq template <class A>
XSIMD_INLINE batch_bool<float, A> eq(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_cmp_ps_mask(self, other, _CMP_EQ_OQ);
} template <class A>
XSIMD_INLINE batch_bool<double, A> eq(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_cmp_pd_mask(self, other, _CMP_EQ_OQ);
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{ return detail::compare_int_avx512f<A, T, _MM_CMPINT_EQ>(self, other);
} template <class A, class T>
XSIMD_INLINE batch_bool<T, A> eq(batch_bool<T, A> const& self, batch_bool<T, A> const& other, requires_arch<avx512f>) noexcept
{ using register_type = typename batch_bool<T, A>::register_type; return register_type(~self.data ^ other.data);
}
// expand template <class A>
XSIMD_INLINE batch<float, A> expand(batch<float, A> const& self, batch_bool<float, A> const& mask, requires_arch<avx512f>) noexcept
{ return _mm512_maskz_expand_ps(mask.mask(), self);
} template <class A>
XSIMD_INLINE batch<double, A> expand(batch<double, A> const& self, batch_bool<double, A> const& mask, requires_arch<avx512f>) noexcept
{ return _mm512_maskz_expand_pd(mask.mask(), self);
} template <class A>
XSIMD_INLINE batch<int32_t, A> expand(batch<int32_t, A> const& self, batch_bool<int32_t, A> const& mask, requires_arch<avx512f>) noexcept
{ return _mm512_maskz_expand_epi32(mask.mask(), self);
} template <class A>
XSIMD_INLINE batch<uint32_t, A> expand(batch<uint32_t, A> const& self, batch_bool<uint32_t, A> const& mask, requires_arch<avx512f>) noexcept
{ return _mm512_maskz_expand_epi32(mask.mask(), self);
} template <class A>
XSIMD_INLINE batch<int64_t, A> expand(batch<int64_t, A> const& self, batch_bool<int64_t, A> const& mask, requires_arch<avx512f>) noexcept
{ return _mm512_maskz_expand_epi64(mask.mask(), self);
} template <class A>
XSIMD_INLINE batch<uint64_t, A> expand(batch<uint64_t, A> const& self, batch_bool<uint64_t, A> const& mask, requires_arch<avx512f>) noexcept
{ return _mm512_maskz_expand_epi64(mask.mask(), self);
}
// floor template <class A>
XSIMD_INLINE batch<float, A> floor(batch<float, A> const& self, requires_arch<avx512f>) noexcept
{ return _mm512_roundscale_ps(self, _MM_FROUND_TO_NEG_INF);
} template <class A>
XSIMD_INLINE batch<double, A> floor(batch<double, A> const& self, requires_arch<avx512f>) noexcept
{ return _mm512_roundscale_pd(self, _MM_FROUND_TO_NEG_INF);
}
// fnma template <class A>
XSIMD_INLINE batch<float, A> fnma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<avx512f>) noexcept
{ return _mm512_fnmadd_ps(x, y, z);
}
template <class A>
XSIMD_INLINE batch<double, A> fnma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<avx512f>) noexcept
{ return _mm512_fnmadd_pd(x, y, z);
}
// fma template <class A>
XSIMD_INLINE batch<float, A> fma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<avx512f>) noexcept
{ return _mm512_fmadd_ps(x, y, z);
}
template <class A>
XSIMD_INLINE batch<double, A> fma(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<avx512f>) noexcept
{ return _mm512_fmadd_pd(x, y, z);
}
// fms template <class A>
XSIMD_INLINE batch<float, A> fms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<avx512f>) noexcept
{ return _mm512_fmsub_ps(x, y, z);
}
template <class A>
XSIMD_INLINE batch<double, A> fms(batch<double, A> const& x, batch<double, A> const& y, batch<double, A> const& z, requires_arch<avx512f>) noexcept
{ return _mm512_fmsub_pd(x, y, z);
}
// from bool template <class A, class T>
XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& self, requires_arch<avx512f>) noexcept
{ return select(self, batch<T, A>(1), batch<T, A>(0));
}
// from_mask template <class T, class A>
XSIMD_INLINE batch_bool<T, A> from_mask(batch_bool<T, A> const&, uint64_t mask, requires_arch<avx512f>) noexcept
{ returnstatic_cast<typename batch_bool<T, A>::register_type>(mask);
}
// gather template <class T, class A, class U, detail::enable_sized_integral_t<T, 4> = 0, detail::enable_sized_integral_t<U, 4> = 0>
XSIMD_INLINE batch<T, A> gather(batch<T, A> const&, T const* src, batch<U, A> const& index,
kernel::requires_arch<avx512f>) noexcept
{ return _mm512_i32gather_epi32(index, static_cast<constvoid*>(src), sizeof(T));
}
template <class T, class A, class U, detail::enable_sized_integral_t<T, 8> = 0, detail::enable_sized_integral_t<U, 8> = 0>
XSIMD_INLINE batch<T, A> gather(batch<T, A> const&, T const* src, batch<U, A> const& index,
kernel::requires_arch<avx512f>) noexcept
{ return _mm512_i64gather_epi64(index, static_cast<constvoid*>(src), sizeof(T));
}
template <class A, class U, detail::enable_sized_integral_t<U, 4> = 0>
XSIMD_INLINE batch<float, A> gather(batch<float, A> const&, floatconst* src,
batch<U, A> const& index,
kernel::requires_arch<avx512f>) noexcept
{ return _mm512_i32gather_ps(index, src, sizeof(float));
}
template <class A, class U, detail::enable_sized_integral_t<U, 8> = 0>
XSIMD_INLINE batch<double, A>
gather(batch<double, A> const&, doubleconst* src, batch<U, A> const& index,
kernel::requires_arch<avx512f>) noexcept
{ return _mm512_i64gather_pd(index, src, sizeof(double));
}
// gather: handmade conversions template <class A, class V, detail::enable_sized_integral_t<V, 4> = 0>
XSIMD_INLINE batch<float, A> gather(batch<float, A> const&, doubleconst* src,
batch<V, A> const& index,
requires_arch<avx512f>) noexcept
{ const batch<double, A> low(_mm512_i32gather_pd(_mm512_castsi512_si256(index.data), src, sizeof(double))); const batch<double, A> high(_mm512_i32gather_pd(_mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castsi512_pd(index.data), 1)), src, sizeof(double))); return detail::merge_avx(_mm512_cvtpd_ps(low.data), _mm512_cvtpd_ps(high.data));
}
template <class A, class V, detail::enable_sized_integral_t<V, 4> = 0>
XSIMD_INLINE batch<int32_t, A> gather(batch<int32_t, A> const&, doubleconst* src,
batch<V, A> const& index,
requires_arch<avx512f>) noexcept
{ const batch<double, A> low(_mm512_i32gather_pd(_mm512_castsi512_si256(index.data), src, sizeof(double))); const batch<double, A> high(_mm512_i32gather_pd(_mm256_castpd_si256(_mm512_extractf64x4_pd(_mm512_castsi512_pd(index.data), 1)), src, sizeof(double))); return detail::merge_avx(_mm512_cvtpd_epi32(low.data), _mm512_cvtpd_epi32(high.data));
}
// ge template <class A>
XSIMD_INLINE batch_bool<float, A> ge(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_cmp_ps_mask(self, other, _CMP_GE_OQ);
} template <class A>
XSIMD_INLINE batch_bool<double, A> ge(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_cmp_pd_mask(self, other, _CMP_GE_OQ);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> ge(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{ return detail::compare_int_avx512f<A, T, _MM_CMPINT_GE>(self, other);
}
// gt template <class A>
XSIMD_INLINE batch_bool<float, A> gt(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_cmp_ps_mask(self, other, _CMP_GT_OQ);
} template <class A>
XSIMD_INLINE batch_bool<double, A> gt(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_cmp_pd_mask(self, other, _CMP_GT_OQ);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{ return detail::compare_int_avx512f<A, T, _MM_CMPINT_GT>(self, other);
}
// haddp template <class A>
XSIMD_INLINE batch<float, A> haddp(batch<float, A> const* row, requires_arch<avx512f>) noexcept
{ // The following folds over the vector once: // tmp1 = [a0..8, b0..8] // tmp2 = [a8..f, b8..f] #define XSIMD_AVX512_HADDP_STEP1(I, a, b) \
batch<float, avx512f> res##I; \
{ \ auto tmp1 = _mm512_shuffle_f32x4(a, b, _MM_SHUFFLE(1, 0, 1, 0)); \ auto tmp2 = _mm512_shuffle_f32x4(a, b, _MM_SHUFFLE(3, 2, 3, 2)); \
res##I = _mm512_add_ps(tmp1, tmp2); \
}
template <class A>
XSIMD_INLINE batch<double, A> ldexp(const batch<double, A>& self, const batch<as_integer_t<double>, A>& other, requires_arch<avx512f>) noexcept
{ // FIXME: potential data loss here when converting other elements to // int32 before converting them back to double.
__m512d adjusted_index = _mm512_cvtepi32_pd(_mm512_cvtepi64_epi32(other)); return _mm512_scalef_pd(self, adjusted_index);
}
// le template <class A>
XSIMD_INLINE batch_bool<float, A> le(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_cmp_ps_mask(self, other, _CMP_LE_OQ);
} template <class A>
XSIMD_INLINE batch_bool<double, A> le(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_cmp_pd_mask(self, other, _CMP_LE_OQ);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> le(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{ return detail::compare_int_avx512f<A, T, _MM_CMPINT_LE>(self, other);
}
// load_aligned template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> load_aligned(T const* mem, convert<T>, requires_arch<avx512f>) noexcept
{ return _mm512_load_si512((__m512i const*)mem);
} template <class A>
XSIMD_INLINE batch<float, A> load_aligned(floatconst* mem, convert<float>, requires_arch<avx512f>) noexcept
{ return _mm512_load_ps(mem);
} template <class A>
XSIMD_INLINE batch<double, A> load_aligned(doubleconst* mem, convert<double>, requires_arch<avx512f>) noexcept
{ return _mm512_load_pd(mem);
}
// load_complex namespace detail
{ template <class A>
XSIMD_INLINE batch<std::complex<float>, A> load_complex(batch<float, A> const& hi, batch<float, A> const& lo, requires_arch<avx512f>) noexcept
{
__m512i real_idx = _mm512_setr_epi32(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30);
__m512i imag_idx = _mm512_setr_epi32(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); auto real = _mm512_permutex2var_ps(hi, real_idx, lo); auto imag = _mm512_permutex2var_ps(hi, imag_idx, lo); return { real, imag };
} template <class A>
XSIMD_INLINE batch<std::complex<double>, A> load_complex(batch<double, A> const& hi, batch<double, A> const& lo, requires_arch<avx512f>) noexcept
{
__m512i real_idx = _mm512_setr_epi64(0, 2, 4, 6, 8, 10, 12, 14);
__m512i imag_idx = _mm512_setr_epi64(1, 3, 5, 7, 9, 11, 13, 15); auto real = _mm512_permutex2var_pd(hi, real_idx, lo); auto imag = _mm512_permutex2var_pd(hi, imag_idx, lo); return { real, imag };
}
}
// load_unaligned template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> load_unaligned(T const* mem, convert<T>, requires_arch<avx512f>) noexcept
{ return _mm512_loadu_si512((__m512i const*)mem);
} template <class A>
XSIMD_INLINE batch<float, A> load_unaligned(floatconst* mem, convert<float>, requires_arch<avx512f>) noexcept
{ return _mm512_loadu_ps(mem);
} template <class A>
XSIMD_INLINE batch<double, A> load_unaligned(doubleconst* mem, convert<double>, requires_arch<avx512f>) noexcept
{ return _mm512_loadu_pd(mem);
}
// lt template <class A>
XSIMD_INLINE batch_bool<float, A> lt(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_cmp_ps_mask(self, other, _CMP_LT_OQ);
} template <class A>
XSIMD_INLINE batch_bool<double, A> lt(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_cmp_pd_mask(self, other, _CMP_LT_OQ);
}
template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{ return detail::compare_int_avx512f<A, T, _MM_CMPINT_LT>(self, other);
}
// mask template <class A, class T>
XSIMD_INLINE uint64_t mask(batch_bool<T, A> const& self, requires_arch<avx512f>) noexcept
{ return self.data;
}
// max template <class A>
XSIMD_INLINE batch<float, A> max(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_max_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> max(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_max_pd(self, other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> max(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{ if (std::is_signed<T>::value)
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{ return _mm512_max_epi32(self, other);
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{ return _mm512_max_epi64(self, other);
} else
{ return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
{ return max(batch<T, avx2>(s), batch<T, avx2>(o)); },
self, other);
}
} else
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{ return _mm512_max_epu32(self, other);
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{ return _mm512_max_epu64(self, other);
} else
{ return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
{ return max(batch<T, avx2>(s), batch<T, avx2>(o)); },
self, other);
}
}
}
// min template <class A>
XSIMD_INLINE batch<float, A> min(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_min_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> min(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_min_pd(self, other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> min(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{ if (std::is_signed<T>::value)
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{ return _mm512_min_epi32(self, other);
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{ return _mm512_min_epi64(self, other);
} else
{ return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
{ return min(batch<T, avx2>(s), batch<T, avx2>(o)); },
self, other);
}
} else
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{ return _mm512_min_epu32(self, other);
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 8)
{ return _mm512_min_epu64(self, other);
} else
{ return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
{ return min(batch<T, avx2>(s), batch<T, avx2>(o)); },
self, other);
}
}
}
// mul template <class A>
XSIMD_INLINE batch<float, A> mul(batch<float, A> const& self, batch<float, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_mul_ps(self, other);
} template <class A>
XSIMD_INLINE batch<double, A> mul(batch<double, A> const& self, batch<double, A> const& other, requires_arch<avx512f>) noexcept
{ return _mm512_mul_pd(self, other);
} template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512f>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 4)
{ return _mm512_mullo_epi32(self, other);
} else
{ return detail::fwd_to_avx([](__m256i s, __m256i o) noexcept
{ return mul(batch<T, avx2>(s), batch<T, avx2>(o)); },
self, other);
}
}
// nearbyint template <class A>
XSIMD_INLINE batch<float, A> nearbyint(batch<float, A> const& self, requires_arch<avx512f>) noexcept
{ return _mm512_roundscale_round_ps(self, _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_CUR_DIRECTION);
} template <class A>
XSIMD_INLINE batch<double, A> nearbyint(batch<double, A> const& self, requires_arch<avx512f>) noexcept
{ return _mm512_roundscale_round_pd(self, _MM_FROUND_TO_NEAREST_INT, _MM_FROUND_CUR_DIRECTION);
}
// nearbyint_as_int template <class A>
XSIMD_INLINE batch<int32_t, A> nearbyint_as_int(batch<float, A> const& self,
--> --------------------
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.