/*************************************************************************** * Copyright (c) Johan Mabille, Sylvain Corlay, Wolf Vollprecht and * * Martin Renou * * Copyright (c) QuantStack * * Copyright (c) Serge Guelton * * * * Distributed under the terms of the BSD 3-Clause License. * * * * The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/
// abs template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> abs(batch<T, A> const& self, requires_arch<avx512bw>) noexcept
{ if (std::is_unsigned<T>::value)
{ return self;
}
// eq template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
{ return detail::compare_int_avx512bw<A, T, _MM_CMPINT_EQ>(self, other);
}
// ge template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> ge(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
{ return detail::compare_int_avx512bw<A, T, _MM_CMPINT_GE>(self, other);
}
// gt template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
{ return detail::compare_int_avx512bw<A, T, _MM_CMPINT_GT>(self, other);
}
// le template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> le(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
{ return detail::compare_int_avx512bw<A, T, _MM_CMPINT_LE>(self, other);
}
// lt template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
{ return detail::compare_int_avx512bw<A, T, _MM_CMPINT_LT>(self, other);
}
// mul template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> mul(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
__m512i upper = _mm512_and_si512(_mm512_mullo_epi16(self, other), _mm512_srli_epi16(_mm512_set1_epi16(-1), 8));
__m512i lower = _mm512_slli_epi16(_mm512_mullo_epi16(_mm512_srli_epi16(self, 8), _mm512_srli_epi16(other, 8)), 8); return _mm512_or_si512(upper, lower);
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{ return _mm512_mullo_epi16(self, other);
} else
{ return mul(self, other, avx512dq {});
}
}
// neq template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch_bool<T, A> neq(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
{ return detail::compare_int_avx512bw<A, T, _MM_CMPINT_NE>(self, other);
}
// rotate_left template <size_t N, class A>
XSIMD_INLINE batch<uint16_t, A> rotate_left(batch<uint16_t, A> const& self, requires_arch<avx512bw>) noexcept
{ return _mm512_alignr_epi8(self, self, N);
} template <size_t N, class A>
XSIMD_INLINE batch<int16_t, A> rotate_left(batch<int16_t, A> const& self, requires_arch<avx512bw>) noexcept
{ return bitwise_cast<int16_t>(rotate_left<N, A>(bitwise_cast<uint16_t>(self), avx2 {}));
}
// sub template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> sub(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
{
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{ return _mm512_sub_epi8(self, other);
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{ return _mm512_sub_epi16(self, other);
} else
{ return sub(self, other, avx512dq {});
}
}
// swizzle (dynamic version) template <class A>
XSIMD_INLINE batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch<uint16_t, A> mask, requires_arch<avx512bw>) noexcept
{ return _mm512_permutexvar_epi16(mask, self);
}
template <class A>
XSIMD_INLINE batch<int16_t, A> swizzle(batch<int16_t, A> const& self, batch<uint16_t, A> mask, requires_arch<avx512bw>) noexcept
{ return bitwise_cast<int16_t>(swizzle(bitwise_cast<uint16_t>(self), mask, avx512bw {}));
}
template <class A>
XSIMD_INLINE batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch<uint8_t, A> mask, requires_arch<avx512bw>) noexcept
{ return _mm512_shuffle_epi8(self, mask);
}
template <class A>
XSIMD_INLINE batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch<uint8_t, A> mask, requires_arch<avx512bw>) noexcept
{ return bitwise_cast<int8_t>(swizzle(bitwise_cast<uint8_t>(self), mask, avx512bw {}));
}
// swizzle (static version) template <class A, uint16_t... Vs>
XSIMD_INLINE batch<uint16_t, A> swizzle(batch<uint16_t, A> const& self, batch_constant<uint16_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
{ return swizzle(self, mask.as_batch(), avx512bw {});
}
template <class A, uint16_t... Vs>
XSIMD_INLINE batch<int16_t, A> swizzle(batch<int16_t, A> const& self, batch_constant<uint16_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
{ return swizzle(self, mask.as_batch(), avx512bw {});
}
template <class A, uint8_t... Vs>
XSIMD_INLINE batch<uint8_t, A> swizzle(batch<uint8_t, A> const& self, batch_constant<uint8_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
{ return swizzle(self, mask.as_batch(), avx512bw {});
}
template <class A, uint8_t... Vs>
XSIMD_INLINE batch<int8_t, A> swizzle(batch<int8_t, A> const& self, batch_constant<uint8_t, A, Vs...> mask, requires_arch<avx512bw>) noexcept
{ return swizzle(self, mask.as_batch(), avx512bw {});
}
// zip_hi template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
{
__m512i lo, hi;
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
lo = _mm512_unpacklo_epi8(self, other);
hi = _mm512_unpackhi_epi8(self, other);
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
lo = _mm512_unpacklo_epi16(self, other);
hi = _mm512_unpackhi_epi16(self, other);
} else
{ return zip_hi(self, other, avx512f {});
} return _mm512_inserti32x4(
_mm512_inserti32x4(
_mm512_inserti32x4(hi, _mm512_extracti32x4_epi32(lo, 2), 0),
_mm512_extracti32x4_epi32(lo, 3),
2),
_mm512_extracti32x4_epi32(hi, 2),
1);
}
// zip_lo template <class A, class T, class = typename std::enable_if<std::is_integral<T>::value, void>::type>
XSIMD_INLINE batch<T, A> zip_lo(batch<T, A> const& self, batch<T, A> const& other, requires_arch<avx512bw>) noexcept
{
__m512i lo, hi;
XSIMD_IF_CONSTEXPR(sizeof(T) == 1)
{
lo = _mm512_unpacklo_epi8(self, other);
hi = _mm512_unpackhi_epi8(self, other);
} else XSIMD_IF_CONSTEXPR(sizeof(T) == 2)
{
lo = _mm512_unpacklo_epi16(self, other);
hi = _mm512_unpackhi_epi16(self, other);
} else
{ return zip_lo(self, other, avx512f {});
} return _mm512_inserti32x4(
_mm512_inserti32x4(
_mm512_inserti32x4(lo, _mm512_extracti32x4_epi32(hi, 0), 1),
_mm512_extracti32x4_epi32(hi, 1),
3),
_mm512_extracti32x4_epi32(lo, 1),
2);
}
}
}
#endif
Messung V0.5
¤ Dauer der Verarbeitung: 0.15 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.