/*************************************************************************** * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * * Martin Renou * * Copyright (c) QuantStack * * Copyright (c) Serge Guelton * * * * Distributed under the terms of the BSD 3-Clause License. * * * * The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/
template <class A, class... Args>
XSIMD_INLINE batch_bool<float, A> set(batch_bool<float, A> const&, requires_arch<neon>, Args... args) noexcept
{ using register_type = typename batch_bool<float, A>::register_type; using unsigned_type = as_unsigned_integer_t<float>; return register_type { static_cast<unsigned_type>(args ? -1LL : 0LL)... };
}
/************* * from_bool *
*************/
template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& arg, requires_arch<neon>) noexcept
{ return vandq_u8(arg, vdupq_n_u8(1));
}
template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0>
XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& arg, requires_arch<neon>) noexcept
{ return vandq_s8(reinterpret_cast<int8x16_t>(arg.data), vdupq_n_s8(1));
}
template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& arg, requires_arch<neon>) noexcept
{ return vandq_u16(arg, vdupq_n_u16(1));
}
template <class A, class T, detail::enable_sized_signed_t<T, 2> = 0>
XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& arg, requires_arch<neon>) noexcept
{ return vandq_s16(reinterpret_cast<int16x8_t>(arg.data), vdupq_n_s16(1));
}
template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& arg, requires_arch<neon>) noexcept
{ return vandq_u32(arg, vdupq_n_u32(1));
}
template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0>
XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& arg, requires_arch<neon>) noexcept
{ return vandq_s32(reinterpret_cast<int32x4_t>(arg.data), vdupq_n_s32(1));
}
template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& arg, requires_arch<neon>) noexcept
{ return vandq_u64(arg, vdupq_n_u64(1));
}
template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
XSIMD_INLINE batch<T, A> from_bool(batch_bool<T, A> const& arg, requires_arch<neon>) noexcept
{ return vandq_s64(reinterpret_cast<int64x2_t>(arg.data), vdupq_n_s64(1));
}
template <class A>
XSIMD_INLINE batch<float, A> from_bool(batch_bool<float, A> const& arg, requires_arch<neon>) noexcept
{ return vreinterpretq_f32_u32(vandq_u32(arg, vreinterpretq_u32_f32(vdupq_n_f32(1.f))));
}
/******** * load *
********/
// It is not possible to use a call to A::alignment() here, so use an // immediate instead. #ifdefined(__clang__) || defined(__GNUC__) #define xsimd_aligned_load(inst, type, expr) inst((type)__builtin_assume_aligned(expr, 16)) #elifdefined(_MSC_VER) #define xsimd_aligned_load(inst, type, expr) inst##_ex((type)expr, 128) #else #define xsimd_aligned_load(inst, type, expr) inst((type)expr) #endif
template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
XSIMD_INLINE batch<T, A> load_aligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return xsimd_aligned_load(vld1q_u8, uint8_t*, src);
}
template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0>
XSIMD_INLINE batch<T, A> load_aligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return xsimd_aligned_load(vld1q_s8, int8_t*, src);
}
template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
XSIMD_INLINE batch<T, A> load_aligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return xsimd_aligned_load(vld1q_u16, uint16_t*, src);
} template <class A, class T, detail::enable_sized_signed_t<T, 2> = 0>
XSIMD_INLINE batch<T, A> load_aligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return xsimd_aligned_load(vld1q_s16, int16_t*, src);
} template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
XSIMD_INLINE batch<T, A> load_aligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return xsimd_aligned_load(vld1q_u32, uint32_t*, src);
} template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0>
XSIMD_INLINE batch<T, A> load_aligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return xsimd_aligned_load(vld1q_s32, int32_t*, src);
} template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
XSIMD_INLINE batch<T, A> load_aligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return xsimd_aligned_load(vld1q_u64, uint64_t*, src);
} template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
XSIMD_INLINE batch<T, A> load_aligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return xsimd_aligned_load(vld1q_s64, int64_t*, src);
}
template <class A>
XSIMD_INLINE batch<float, A> load_aligned(floatconst* src, convert<float>, requires_arch<neon>) noexcept
{ return xsimd_aligned_load(vld1q_f32, float*, src);
}
#undef xsimd_aligned_load
template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
XSIMD_INLINE batch<T, A> load_unaligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return vld1q_u8((uint8_t*)src);
}
template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0>
XSIMD_INLINE batch<T, A> load_unaligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return vld1q_s8((int8_t*)src);
}
template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
XSIMD_INLINE batch<T, A> load_unaligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return vld1q_u16((uint16_t*)src);
} template <class A, class T, detail::enable_sized_signed_t<T, 2> = 0>
XSIMD_INLINE batch<T, A> load_unaligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return vld1q_s16((int16_t*)src);
} template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
XSIMD_INLINE batch<T, A> load_unaligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return vld1q_u32((uint32_t*)src);
} template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0>
XSIMD_INLINE batch<T, A> load_unaligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return vld1q_s32((int32_t*)src);
} template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
XSIMD_INLINE batch<T, A> load_unaligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return vld1q_u64((uint64_t*)src);
} template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
XSIMD_INLINE batch<T, A> load_unaligned(T const* src, convert<T>, requires_arch<neon>) noexcept
{ return vld1q_s64((int64_t*)src);
}
template <class A>
XSIMD_INLINE batch<float, A> load_unaligned(floatconst* src, convert<float>, requires_arch<neon>) noexcept
{ return vld1q_f32(src);
}
/********* * store *
*********/
template <class A, class T, detail::enable_sized_unsigned_t<T, 1> = 0>
XSIMD_INLINE void store_aligned(T* dst, batch<T, A> const& src, requires_arch<neon>) noexcept
{
vst1q_u8((uint8_t*)dst, src);
}
template <class A, class T, detail::enable_sized_signed_t<T, 1> = 0>
XSIMD_INLINE void store_aligned(T* dst, batch<T, A> const& src, requires_arch<neon>) noexcept
{
vst1q_s8((int8_t*)dst, src);
}
template <class A, class T, detail::enable_sized_unsigned_t<T, 2> = 0>
XSIMD_INLINE void store_aligned(T* dst, batch<T, A> const& src, requires_arch<neon>) noexcept
{
vst1q_u16((uint16_t*)dst, src);
}
template <class A, class T, detail::enable_sized_signed_t<T, 2> = 0>
XSIMD_INLINE void store_aligned(T* dst, batch<T, A> const& src, requires_arch<neon>) noexcept
{
vst1q_s16((int16_t*)dst, src);
}
template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
XSIMD_INLINE void store_aligned(T* dst, batch<T, A> const& src, requires_arch<neon>) noexcept
{
vst1q_u32((uint32_t*)dst, src);
}
template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0>
XSIMD_INLINE void store_aligned(T* dst, batch<T, A> const& src, requires_arch<neon>) noexcept
{
vst1q_s32((int32_t*)dst, src);
}
template <class A, class T, detail::enable_sized_unsigned_t<T, 8> = 0>
XSIMD_INLINE void store_aligned(T* dst, batch<T, A> const& src, requires_arch<neon>) noexcept
{
vst1q_u64((uint64_t*)dst, src);
}
template <class A, class T, detail::enable_sized_signed_t<T, 8> = 0>
XSIMD_INLINE void store_aligned(T* dst, batch<T, A> const& src, requires_arch<neon>) noexcept
{
vst1q_s64((int64_t*)dst, src);
}
template <class A>
XSIMD_INLINE void store_aligned(float* dst, batch<float, A> const& src, requires_arch<neon>) noexcept
{
vst1q_f32(dst, src);
}
template <class A, class T>
XSIMD_INLINE void store_unaligned(T* dst, batch<T, A> const& src, requires_arch<neon>) noexcept
{
store_aligned<A>(dst, src, A {});
}
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
XSIMD_INLINE batch<T, A> mul(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
{ using register_type = typename batch<T, A>::register_type; const detail::excluding_int64_dispatcher::binary dispatcher = {
std::make_tuple(wrap::vmulq_u8, wrap::vmulq_s8, wrap::vmulq_u16, wrap::vmulq_s16,
wrap::vmulq_u32, wrap::vmulq_s32, wrap::vmulq_f32)
}; return dispatcher.apply(register_type(lhs), register_type(rhs));
}
/******* * div *
*******/
#ifdefined(XSIMD_FAST_INTEGER_DIVISION) template <class A, class T, detail::enable_sized_signed_t<T, 4> = 0>
XSIMD_INLINE batch<T, A> div(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
{ return vcvtq_s32_f32(vcvtq_f32_s32(lhs) / vcvtq_f32_s32(rhs));
}
template <class A, class T, detail::enable_sized_unsigned_t<T, 4> = 0>
XSIMD_INLINE batch<T, A> div(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
{ return vcvtq_u32_f32(vcvtq_f32_u32(lhs) / vcvtq_f32_u32(rhs));
} #endif
template <class A>
XSIMD_INLINE batch<float, A> div(batch<float, A> const& lhs, batch<float, A> const& rhs, requires_arch<neon>) noexcept
{ // from stackoverflow & https://projectne10.github.io/Ne10/doc/NE10__divc_8neon_8c_source.html // get an initial estimate of 1/b.
float32x4_t rcp = reciprocal(rhs);
// use a couple Newton-Raphson steps to refine the estimate. Depending on your // application's accuracy requirements, you may be able to get away with only // one refinement (instead of the two used here). Be sure to test!
rcp = vmulq_f32(vrecpsq_f32(rhs, rcp), rcp);
rcp = vmulq_f32(vrecpsq_f32(rhs, rcp), rcp);
// and finally, compute a / b = a * (1 / b) return vmulq_f32(lhs, rcp);
}
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
{ using register_type = typename batch<T, A>::register_type; const detail::excluding_int64_comp_dispatcher::binary dispatcher = {
std::make_tuple(wrap::vceqq_u8, wrap::vceqq_s8, wrap::vceqq_u16, wrap::vceqq_s16,
wrap::vceqq_u32, wrap::vceqq_s32, wrap::vceqq_f32)
}; return dispatcher.apply(register_type(lhs), register_type(rhs));
}
template <class A, class T, detail::exclude_int64_neon_t<T> = 0>
XSIMD_INLINE batch_bool<T, A> eq(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<neon>) noexcept
{ using register_type = typename batch_bool<T, A>::register_type; using dispatcher_type = detail::neon_comp_dispatcher_impl<uint8x16_t, uint16x8_t, uint32x4_t>::binary; const dispatcher_type dispatcher = {
std::make_tuple(wrap::vceqq_u8, wrap::vceqq_u16, wrap::vceqq_u32)
}; return dispatcher.apply(register_type(lhs), register_type(rhs));
}
template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
{ return batch_bool<T, A>({ lhs.get(0) == rhs.get(0), lhs.get(1) == rhs.get(1) });
}
template <class A, class T, detail::enable_sized_integral_t<T, 8> = 0>
XSIMD_INLINE batch_bool<T, A> eq(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<neon>) noexcept
{ return batch_bool<T, A>({ lhs.get(0) == rhs.get(0), lhs.get(1) == rhs.get(1) });
}
/************* * fast_cast *
*************/
namespace detail
{ template <class A>
XSIMD_INLINE batch<float, A> fast_cast(batch<int32_t, A> const& self, batch<float, A> const&, requires_arch<neon>) noexcept
{ return vcvtq_f32_s32(self);
}
template <class A>
XSIMD_INLINE batch<float, A> fast_cast(batch<uint32_t, A> const& self, batch<float, A> const&, requires_arch<neon>) noexcept
{ return vcvtq_f32_u32(self);
}
template <class A>
XSIMD_INLINE batch<int32_t, A> fast_cast(batch<float, A> const& self, batch<int32_t, A> const&, requires_arch<neon>) noexcept
{ return vcvtq_s32_f32(self);
}
template <class A>
XSIMD_INLINE batch<uint32_t, A> fast_cast(batch<float, A> const& self, batch<uint32_t, A> const&, requires_arch<neon>) noexcept
{ return vcvtq_u32_f32(self);
}
template <class A, class T_out, class T_in>
XSIMD_INLINE batch_bool<T_out, A> batch_bool_cast(batch_bool<T_in, A> const& self, batch_bool<T_out, A> const&, requires_arch<neon>) noexcept
{ using register_type = typename batch_bool<T_out, A>::register_type; return register_type(self);
}
template <class A, class T, detail::enable_neon_type_t<T> = 0>
XSIMD_INLINE batch<T, A> bitwise_xor(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<neon>) noexcept
{ using register_type = typename batch<T, A>::register_type; return detail::bitwise_xor_neon(register_type(lhs), register_type(rhs));
}
template <class A, class T, detail::enable_neon_type_t<T> = 0>
XSIMD_INLINE batch_bool<T, A> bitwise_xor(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<neon>) noexcept
{ using register_type = typename batch_bool<T, A>::register_type; return detail::bitwise_xor_neon(register_type(lhs), register_type(rhs));
}
/******* * neq *
*******/
template <class A, class T>
XSIMD_INLINE batch_bool<T, A> neq(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<neon>) noexcept
{ return bitwise_xor(lhs, rhs, A {});
}
#ifdef __ARM_FEATURE_FMA template <class A>
XSIMD_INLINE batch<float, A> fma(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<neon>) noexcept
{ return vfmaq_f32(z, x, y);
}
template <class A>
XSIMD_INLINE batch<float, A> fms(batch<float, A> const& x, batch<float, A> const& y, batch<float, A> const& z, requires_arch<neon>) noexcept
{ return vfmaq_f32(-z, x, y);
} #endif
template <class A>
XSIMD_INLINE batch<float, A>
reciprocal(const batch<float, A>& x,
kernel::requires_arch<neon>) noexcept
{ return vrecpeq_f32(x);
}
/********** * insert *
**********/
template <class A, class T, size_t I, detail::enable_sized_unsigned_t<T, 1> = 0>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<neon>) noexcept
{ return vsetq_lane_u8(val, self, I);
}
template <class A, class T, size_t I, detail::enable_sized_signed_t<T, 1> = 0>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<neon>) noexcept
{ return vsetq_lane_s8(val, self, I);
}
template <class A, class T, size_t I, detail::enable_sized_unsigned_t<T, 2> = 0>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<neon>) noexcept
{ return vsetq_lane_u16(val, self, I);
}
template <class A, class T, size_t I, detail::enable_sized_signed_t<T, 2> = 0>
XSIMD_INLINE batch<int16_t, A> insert(batch<int16_t, A> const& self, int16_t val, index<I>, requires_arch<neon>) noexcept
{ return vsetq_lane_s16(val, self, I);
}
template <class A, class T, size_t I, detail::enable_sized_unsigned_t<T, 4> = 0>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<neon>) noexcept
{ return vsetq_lane_u32(val, self, I);
}
template <class A, class T, size_t I, detail::enable_sized_signed_t<T, 4> = 0>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& self, T val, index<I>, requires_arch<neon>) noexcept
{ return vsetq_lane_s32(val, self, I);
}
template <class A, class T, size_t I, detail::enable_sized_unsigned_t<T, 8> = 0>
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.60 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.