* Copyright (c) Rivos Inc. * * * * Distributed under the terms of the BSD 3-Clause License. * * * * The full license is in the file LICENSE, distributed with this software. *
****************************************************************************/
// This set of macros allows the synthesis of identifiers using a template and // variable macro arguments. A single template can then be used by multiple // macros, or multiple instances of a macro to define the same logic for // different data types. // // First some logic to paste text together... // #define XSIMD_RVV_JOIN_(x, y) x##y #define XSIMD_RVV_JOIN(x, y) XSIMD_RVV_JOIN_(x, y) #define XSIMD_RVV_PREFIX_T(T, S, then) XSIMD_RVV_JOIN(T, then) #define XSIMD_RVV_PREFIX_S(T, S, then) XSIMD_RVV_JOIN(S, then) #define XSIMD_RVV_PREFIX_M(T, S, then) XSIMD_RVV_JOIN(m1, then) #define XSIMD_RVV_PREFIX(T, S, then) then // // XSIMD_RVV_IDENTIFIER accepts type and size parameters, and a template for // the identifier. The template is a comma-separated list of alternating // literal and parameter segments. Each parameter is appended to XSIMD_RVV_PREFIX to // form a new macro name which decides which parameter should be inserted. // Then a literal segment is inserted after that. Empty literals are used to // join two or more variables together. // #define XSIMD_RVV_IDENTIFIER9(T, S, t, ...) t #define XSIMD_RVV_IDENTIFIER8(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER9(T, S, __VA_ARGS__))) #define XSIMD_RVV_IDENTIFIER7(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER8(T, S, __VA_ARGS__))) #define XSIMD_RVV_IDENTIFIER6(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER7(T, S, __VA_ARGS__))) #define XSIMD_RVV_IDENTIFIER5(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER6(T, S, __VA_ARGS__))) #define XSIMD_RVV_IDENTIFIER4(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER5(T, S, __VA_ARGS__))) #define XSIMD_RVV_IDENTIFIER3(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER4(T, S, __VA_ARGS__))) #define XSIMD_RVV_IDENTIFIER2(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER3(T, S, __VA_ARGS__))) #define XSIMD_RVV_IDENTIFIER1(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER2(T, S, __VA_ARGS__))) #define XSIMD_RVV_IDENTIFIER0(T, S, t, p, ...) XSIMD_RVV_JOIN(t, XSIMD_RVV_PREFIX##p(T, S, XSIMD_RVV_IDENTIFIER1(T, S, __VA_ARGS__))) // // UNBRACKET and REPARSE force the preprocessor to handle expansion in a // specific order. XSIMD_RVV_UNBRACKET strips the parentheses from the template // (which were necessary to keep the template as a single, named macro // parameter up to this point). XSIMD_RVV_ARG_LIST then forms the new parameter list // to pass to XSIMD_RVV_IDENTIFIER0, with trailing commas to ensure the unrolled // XSIMD_RVV_IDENTIFIER loop runs to completion adding empty strings. // // However XSIMD_RVV_IDENTIFIER0 is not expanded immediately because it does not // match a function-like macro in this pass. XSIMD_RVV_REPARSE forces another // evaluation after the expansion of XSIMD_RVV_ARG_LIST, where XSIMD_RVV_IDENTIFIER0 will // now match as a function-like macro, and the cycle of substitutions and // insertions can begin. // #define XSIMD_RVV_REPARSE(v) (v) #define XSIMD_RVV_UNBRACKET(...) __VA_ARGS__ #define XSIMD_RVV_ARG_LIST(T, S, name) (T, S, XSIMD_RVV_UNBRACKET name, , , , , , , , , , , , , , , , , , , , , ) #define XSIMD_RVV_IDENTIFIER(T, S, name) XSIMD_RVV_REPARSE(XSIMD_RVV_IDENTIFIER0 XSIMD_RVV_ARG_LIST(T, S, name)) // // To avoid comma-counting bugs, replace the variable references with macros // which include enough commas to keep proper phase, and then use no commas at // all in the templates. // #define XSIMD_RVV_T , _T, #define XSIMD_RVV_S , _S, #define XSIMD_RVV_M , _M, #define XSIMD_RVV_TSM XSIMD_RVV_T XSIMD_RVV_S XSIMD_RVV_M
// XSIMD_RVV_OVERLOAD, below, expands to a head section, a number of body sections // (depending on which types are supported), and a tail section. Different // variants of these sections are implemented with different suffixes on the // three macro names XSIMD_RVV_WRAPPER_HEAD, XSIMD_RVV_WRAPPER, and XSIMD_RVV_WRAPPER_TAIL and // specified as an argument to XSIMD_RVV_OVERLOAD (the empty string is the default, // but still needs an extra comma to hold its place). // // The default XSIMD_RVV_WRAPPER_HEAD provides a class containing convenient names // for the function signature argument(s) to XSIMD_RVV_OVERLOAD. That signature can // also reference the template argument T, because it's a text substitution // into the template. #define XSIMD_RVV_WRAPPER_HEAD(NAME, SIGNATURE, ...) \ namespace NAME##_cruft \
{ \ template <class T> \ struct ctx \
{ \ static constexpr size_t width = XSIMD_RVV_BITS; \ static constexpr size_t vl = width / (sizeof(T) * 8); \ using vec = rvv_reg_t<T, width>; \ using uvec = rvv_reg_t<as_unsigned_relaxed_t<T>, width>; \ using svec = rvv_reg_t<as_signed_relaxed_t<T>, width>; \ using fvec = rvv_reg_t<as_float_relaxed_t<T>, width>; \ using bvec = rvv_bool_t<T, width>; \ using scalar_vec = rvv_reg_t<T, types::detail::rvv_width_m1>; \ using wide_vec = rvv_reg_t<T, width * 2>; \ using narrow_vec = rvv_reg_t<T, width / 2>; \ using type = SIGNATURE; \
}; \ template <class T> \ using sig_t = typename ctx<T>::type; \ template <class K, class T> \ struct impl \
{ \ voidoperator()() const noexcept {}; \
}; \ template <class K> \ using impl_t = impl<K, sig_t<K>>;
// The body of the wrapper defines a functor (because partial specialisation of // functions is not legal) which forwards its arguments to the named intrinsic // with a few manipulations. In general, vector types are handled as // rvv_reg_t<> and rely on the conversion operators in that class for // compatibility with the intrinsics. // // The function signature is not mentioned here. Instead it's provided in the // tail code as the template argument for which this is a specialisation, which // overcomes the problem of converting a function signature type to an argument // list to pass to another function. // #define XSIMD_RVV_WRAPPER(KEY, CALLEE, ...) \ template <class Ret, class... Args> \ struct impl<KEY, Ret(Args...)> \
{ \ using ctx = ctx<KEY>; \
constexpr Ret operator()(Args... args) const noexcept \
{ \ return CALLEE(args..., ctx::vl); \
}; \
}; #define XSIMD_RVV_WRAPPER_NOVL(KEY, CALLEE, ...) \ template <class Ret, class... Args> \ struct impl<KEY, Ret(Args...)> \
{ \
constexpr Ret operator()(Args... args) const noexcept \
{ \ return CALLEE(args...); \
}; \
}; #define XSIMD_RVV_WRAPPER_DROP_1ST(KEY, CALLEE, ...) \ template <class Ret, class First, class... Args> \ struct impl<KEY, Ret(First, Args...)> \
{ \ using ctx = ctx<KEY>; \
constexpr Ret operator()(First, Args... args) const noexcept \
{ \ return CALLEE(args..., ctx::vl); \
}; \
}; #define XSIMD_RVV_WRAPPER_DROP_1ST_CUSTOM_ARGS(KEY, CALLEE, SIGNATURE, ...) \ template <class Ret, class First, class... Args> \ struct impl<KEY, Ret(First, Args...)> \
{ \ using ctx = ctx<KEY>; \
constexpr Ret operator()(First, Args... args) const noexcept \
{ \ return CALLEE(__VA_ARGS__, ctx::vl); \
}; \
}; #define XSIMD_RVV_WRAPPER_DROP_1ST_CUSTOM_ARGS_NOVL(KEY, CALLEE, SIGNATURE, ...) \ template <class Ret, class First, class... Args> \ struct impl<KEY, Ret(First, Args...)> \
{ \
constexpr Ret operator()(First, Args... args) const noexcept \
{ \ return CALLEE(__VA_ARGS__); \
}; \
};
// This part folds all the above templates down into a single functor instance // with all the different function signatures available under the one name. // Not all of the base classes necessarily contain useful code, but there's a // default implementation so that filtering them out isn't really necessary. #define XSIMD_RVV_WRAPPER_TAIL(NAME, ...) \
} /* namespace NAME##_cruft */ \ static constexpr struct : NAME##_cruft::impl_t<int8_t>, \
NAME##_cruft::impl_t<uint8_t>, \
NAME##_cruft::impl_t<int16_t>, \
NAME##_cruft::impl_t<uint16_t>, \
NAME##_cruft::impl_t<int32_t>, \
NAME##_cruft::impl_t<uint32_t>, \
NAME##_cruft::impl_t<int64_t>, \
NAME##_cruft::impl_t<uint64_t>, \
NAME##_cruft::impl_t<float>, \
NAME##_cruft::impl_t<double> \
{ \ using NAME##_cruft::impl_t<int8_t>::operator(); \ using NAME##_cruft::impl_t<uint8_t>::operator(); \ using NAME##_cruft::impl_t<int16_t>::operator(); \ using NAME##_cruft::impl_t<uint16_t>::operator(); \ using NAME##_cruft::impl_t<int32_t>::operator(); \ using NAME##_cruft::impl_t<uint32_t>::operator(); \ using NAME##_cruft::impl_t<int64_t>::operator(); \ using NAME##_cruft::impl_t<uint64_t>::operator(); \ using NAME##_cruft::impl_t<float>::operator(); \ using NAME##_cruft::impl_t<double>::operator(); \
} NAME {}; #define XSIMD_RVV_WRAPPER_TAIL_NOVL(...) XSIMD_RVV_WRAPPER_TAIL(__VA_ARGS__) #define XSIMD_RVV_WRAPPER_TAIL_DROP_1ST(...) XSIMD_RVV_WRAPPER_TAIL(__VA_ARGS__) #define XSIMD_RVV_WRAPPER_TAIL_DROP_1ST_CUSTOM_ARGS(...) XSIMD_RVV_WRAPPER_TAIL(__VA_ARGS__) #define XSIMD_RVV_WRAPPER_TAIL_DROP_1ST_CUSTOM_ARGS_NOVL(...) XSIMD_RVV_WRAPPER_TAIL(__VA_ARGS__)
// Use these to create function (actually functor, sorry) wrappers overloaded // for whichever types are supported. Being functors means they can't take a // template argument (until C++14), so if a type can't be deduced then a junk // value can be passed as the first argument and discarded by using the // _DROP_1ST variant, instead. // // The wrappers use the rvv_reg_t<> types for template accessibility, and // because some types (eg., vfloat64mf2_t) don't exist and need extra // abstraction to emulate. // // In many cases the intrinsic names are different for signed, unsigned, or // float variants, the macros OVERLOAD2 and OVERLOAD3 (depending on whether or // not a float variant exists) take multiple intrinsic names and bring them // together under a single overloaded identifier where they can be used within // templates. // #define XSIMD_RVV_OVERLOAD2(my_name, name_i, name_u, variant, ...) \
XSIMD_RVV_OVERLOAD_head(my_name, variant, __VA_ARGS__) \
XSIMD_RVV_OVERLOAD_i(name_i, variant, __VA_ARGS__) \
XSIMD_RVV_OVERLOAD_u(name_u, variant, __VA_ARGS__) \
XSIMD_RVV_OVERLOAD_tail(my_name, variant, __VA_ARGS__)
template <size_t> struct as_signed_relaxed; template <> struct as_signed_relaxed<1>
{ using type = int8_t;
}; template <> struct as_signed_relaxed<2>
{ using type = int16_t;
}; template <> struct as_signed_relaxed<4>
{ using type = int32_t;
}; template <> struct as_signed_relaxed<8>
{ using type = int64_t;
}; template <class T> using as_signed_relaxed_t = typename as_signed_relaxed<sizeof(T)>::type; template <size_t> struct as_unsigned_relaxed; template <> struct as_unsigned_relaxed<1>
{ using type = uint8_t;
}; template <> struct as_unsigned_relaxed<2>
{ using type = uint16_t;
}; template <> struct as_unsigned_relaxed<4>
{ using type = uint32_t;
}; template <> struct as_unsigned_relaxed<8>
{ using type = uint64_t;
}; template <class T> using as_unsigned_relaxed_t = typename as_unsigned_relaxed<sizeof(T)>::type; template <size_t> struct as_float_relaxed; template <> struct as_float_relaxed<1>
{ using type = int8_t;
}; template <> struct as_float_relaxed<2>
{ using type = int16_t;
}; template <> struct as_float_relaxed<4>
{ using type = float;
}; template <> struct as_float_relaxed<8>
{ using type = double;
}; template <class T> using as_float_relaxed_t = typename as_float_relaxed<sizeof(T)>::type;
template <class T, class U>
rvv_reg_t<T, U::width> rvvreinterpret(U const& arg) noexcept
{ return rvv_reg_t<T, U::width>(arg, types::detail::XSIMD_RVV_BITCAST);
} template <class T, class A, class U>
rvv_reg_t<T, A::width> rvvreinterpret(batch<U, A> const& arg) noexcept
{ typename batch<U, A>::register_type r = arg; return rvvreinterpret<T>(r);
}
template <class A, class T, class U = as_unsigned_integer_t<T>>
XSIMD_INLINE batch<U, A> rvv_to_unsigned_batch(batch<T, A> const& arg) noexcept
{ return rvvreinterpret<U>(arg.data);
}
template <class A, class T, size_t offset = 0, int shift = 0>
XSIMD_INLINE rvv_reg_t<T, A::width> vindex() noexcept
{ auto index = rvvid(T {}); if (shift < 0)
index = __riscv_vsrl(index, -shift, batch<T, A>::size); else
index = __riscv_vsll(index, shift, batch<T, A>::size); return __riscv_vadd(index, T(offset), batch<T, A>::size);
}
// enable for signed integers template <class T> using rvv_enable_signed_int_t = typename std::enable_if<std::is_integral<T>::value && std::is_signed<T>::value, int>::type;
// enable for unsigned integers template <class T> using rvv_enable_unsigned_int_t = typename std::enable_if<std::is_integral<T>::value && std::is_unsigned<T>::value, int>::type;
// enable for floating points template <class T> using rvv_enable_floating_point_t = typename std::enable_if<std::is_floating_point<T>::value, int>::type;
// enable for signed integers or floating points template <class T> using rvv_enable_signed_int_or_floating_point_t = typename std::enable_if<std::is_signed<T>::value, int>::type;
// enable for all RVE supported types template <class T> using rvv_enable_all_t = typename std::enable_if<std::is_arithmetic<T>::value, int>::type;
} // namespace detail
/******************** * Scalar to vector *
********************/
namespace detail
{ template <class T, size_t Width>
XSIMD_INLINE detail::rvv_reg_t<T, Width> broadcast(T arg) noexcept
{ // A bit of a dance, here, because rvvmv_splat has no other // argument from which to deduce type, and T=char is not // supported.
detail::rvv_fix_char_t<T> arg_not_char(arg); constauto splat = detail::rvvmv_splat(arg_not_char); return detail::rvv_reg_t<T, Width>(splat.get_bytes(), types::detail::XSIMD_RVV_BITCAST);
}
}
// broadcast template <class A, class T>
XSIMD_INLINE batch<T, A> broadcast(T arg, requires_arch<rvv>) noexcept
{ return detail::broadcast<T, A::width>(arg);
}
// scatter template <class A, class T, class U, detail::rvv_enable_sg_t<T, U> = 0>
XSIMD_INLINE void scatter(batch<T, A> const& vals, T* dst, batch<U, A> const& index, kernel::requires_arch<rvv>) noexcept
{ using UU = as_unsigned_integer_t<U>; constauto uindex = detail::rvv_to_unsigned_batch(index); auto* base = reinterpret_cast<detail::rvv_fix_char_t<T>*>(dst); // or rvvsuxei constauto bi = detail::rvvmul_splat(uindex, sizeof(T));
detail::rvvsoxei(base, bi, vals);
}
// gather template <class A, class T, class U, detail::rvv_enable_sg_t<T, U> = 0>
XSIMD_INLINE batch<T, A> gather(batch<T, A> const&, T const* src, batch<U, A> const& index, kernel::requires_arch<rvv>) noexcept
{ using UU = as_unsigned_integer_t<U>; constauto uindex = detail::rvv_to_unsigned_batch(index); autoconst* base = reinterpret_cast<detail::rvv_fix_char_t<T> const*>(src); // or rvvluxei constauto bi = detail::rvvmul_splat(uindex, sizeof(T)); return detail::rvvloxei(base, bi);
}
// add template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> add(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvadd(lhs, rhs);
}
// sadd template <class A, class T, detail::enable_integral_t<T> = 0>
XSIMD_INLINE batch<T, A> sadd(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvsadd(lhs, rhs);
}
// sub template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> sub(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvsub(lhs, rhs);
}
// ssub template <class A, class T, detail::enable_integral_t<T> = 0>
XSIMD_INLINE batch<T, A> ssub(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvssub(lhs, rhs);
}
// mul template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> mul(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvmul(lhs, rhs);
}
// div template <class A, class T, typename detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> div(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvdiv(lhs, rhs);
}
// max template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> max(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvmax(lhs, rhs);
}
// min template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> min(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvmin(lhs, rhs);
}
// neg template <class A, class T, detail::rvv_enable_unsigned_int_t<T> = 0>
XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<rvv>) noexcept
{ using S = as_signed_integer_t<T>; constauto as_signed = detail::rvvreinterpret<S>(arg); constauto result = detail::rvvneg(as_signed); return detail::rvvreinterpret<T>(result);
}
template <class A, class T, detail::rvv_enable_signed_int_or_floating_point_t<T> = 0>
XSIMD_INLINE batch<T, A> neg(batch<T, A> const& arg, requires_arch<rvv>) noexcept
{ return detail::rvvneg(arg);
}
// abs template <class A, class T, detail::rvv_enable_unsigned_int_t<T> = 0>
XSIMD_INLINE batch<T, A> abs(batch<T, A> const& arg, requires_arch<rvv>) noexcept
{ return arg;
}
template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
XSIMD_INLINE batch<T, A> abs(batch<T, A> const& arg, requires_arch<rvv>) noexcept
{ return detail::rvvabs(arg);
}
// fma: x * y + z template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> fma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<rvv>) noexcept
{ // also detail::rvvmadd(x, y, z); return detail::rvvmacc(z, x, y);
}
// fnma: z - x * y template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> fnma(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<rvv>) noexcept
{ // also detail::rvvnmsub(x, y, z); return detail::rvvnmsac(z, x, y);
}
// fms: x * y - z template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> fms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<rvv>) noexcept
{ // also vfmsac(z, x, y), but lacking integer version // also vfmsub(x, y, z), but lacking integer version return -fnma(x, y, z);
}
// fnms: - x * y - z template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> fnms(batch<T, A> const& x, batch<T, A> const& y, batch<T, A> const& z, requires_arch<rvv>) noexcept
{ // also vfnmacc(z, x, y), but lacking integer version // also vfnmadd(x, y, z), but lacking integer version return -fma(z, x, y);
}
template <class A, class T>
XSIMD_INLINE T reduce_scalar(rvv_reg_t<T, types::detail::rvv_width_m1> const& arg)
{ return detail::rvvmv_lane0(rvv_reg_t<T, A::width>(arg.get_bytes(), types::detail::XSIMD_RVV_BITCAST));
}
} // reduce_add template <class A, class T, class V = typename batch<T, A>::value_type, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE V reduce_add(batch<T, A> const& arg, requires_arch<rvv>) noexcept
{ constauto zero = detail::broadcast<T, types::detail::rvv_width_m1>(T(0)); constauto r = detail::rvvredsum(arg, zero); return detail::reduce_scalar<A, T>(r);
}
// reduce_max template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE T reduce_max(batch<T, A> const& arg, requires_arch<rvv>) noexcept
{ constauto lowest = detail::broadcast<T, types::detail::rvv_width_m1>(std::numeric_limits<T>::lowest()); constauto r = detail::rvvredmax(arg, lowest); return detail::reduce_scalar<A, T>(r);
}
// reduce_min template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE T reduce_min(batch<T, A> const& arg, requires_arch<rvv>) noexcept
{ constauto max = detail::broadcast<T, types::detail::rvv_width_m1>(std::numeric_limits<T>::max()); constauto r = detail::rvvredmin(arg, max); return detail::reduce_scalar<A, T>(r);
}
// haddp template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
XSIMD_INLINE batch<T, A> haddp(const batch<T, A>* row, requires_arch<rvv>) noexcept
{
constexpr std::size_t size = batch<T, A>::size;
T sums[size]; #pragma unroll size for (std::size_t i = 0; i < size; ++i)
{
sums[i] = reduce_add(row[i], rvv {});
} return load_aligned<A>(sums, convert<T>(), rvv {});
}
/*************** * Comparisons *
***************/
// eq template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch_bool<T, A> eq(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvmseq(lhs, rhs);
}
template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch_bool<T, A> eq(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<rvv>) noexcept
{ constauto neq_result = detail::rvvmxor(lhs, rhs); return detail::rvvmnot(neq_result);
}
// neq template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch_bool<T, A> neq(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvmsne(lhs, rhs);
}
template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch_bool<T, A> neq(batch_bool<T, A> const& lhs, batch_bool<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvmxor(lhs, rhs);
}
// lt template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch_bool<T, A> lt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvmslt(lhs, rhs);
}
// le template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch_bool<T, A> le(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvmsle(lhs, rhs);
}
// gt template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch_bool<T, A> gt(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvmsgt(lhs, rhs);
}
// ge template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch_bool<T, A> ge(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ return detail::rvvmsge(lhs, rhs);
}
/************* * Selection *
*************/ namespace detail
{
XSIMD_RVV_OVERLOAD(rvvcompress, (__riscv_vcompress_tu), , vec(vec, vec, bvec))
} // compress template <class A, class T>
XSIMD_INLINE batch<T, A> compress(batch<T, A> const& x, batch_bool<T, A> const& mask, requires_arch<rvv>) noexcept
{ auto zero = broadcast<A>(T(0), rvv {}); return detail::rvvcompress(zero, x, mask);
}
// swizzle template <class A, class T, class I, I... idx>
XSIMD_INLINE batch<T, A> swizzle(batch<T, A> const& arg, batch_constant<I, A, idx...>, requires_arch<rvv>) noexcept
{
static_assert(batch<T, A>::size == sizeof...(idx), "invalid swizzle indices"); const batch<I, A> indices { idx... }; return detail::rvvrgather(arg, indices);
}
template <class A, class T, class I, I... idx>
XSIMD_INLINE batch<std::complex<T>, A> swizzle(batch<std::complex<T>, A> const& self,
batch_constant<I, A, idx...>,
requires_arch<rvv>) noexcept
{ constauto real = swizzle(self.real(), batch_constant<I, A, idx...> {}, rvv {}); constauto imag = swizzle(self.imag(), batch_constant<I, A, idx...> {}, rvv {}); return batch<std::complex<T>>(real, imag);
}
/************* * Selection *
*************/
// extract_pair
template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> extract_pair(batch<T, A> const& lhs, batch<T, A> const& rhs, size_t n, requires_arch<rvv>) noexcept
{ constauto tmp = detail::rvvslidedown(rhs, n); return detail::rvvslideup(tmp, lhs, lhs.size - n);
}
// select template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> select(batch_bool<T, A> const& cond, batch<T, A> const& a, batch<T, A> const& b, requires_arch<rvv>) noexcept
{ return detail::rvvmerge(b, a, cond);
}
template <class A, class T, bool... b>
XSIMD_INLINE batch<T, A> select(batch_bool_constant<T, A, b...> const&, batch<T, A> const& true_br, batch<T, A> const& false_br, requires_arch<rvv>) noexcept
{ return select(batch_bool<T, A> { b... }, true_br, false_br, rvv {});
}
// zip_lo template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> zip_lo(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ constauto index = detail::vindex<A, as_unsigned_integer_t<T>, 0, -1>(); constauto mask = detail::pmask8<T, A::width>(0xaa); return detail::rvvmerge(detail::rvvrgather(lhs, index),
detail::rvvrgather(rhs, index),
mask);
}
// zip_hi template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> zip_hi(batch<T, A> const& lhs, batch<T, A> const& rhs, requires_arch<rvv>) noexcept
{ constauto index = detail::vindex<A, as_unsigned_integer_t<T>, batch<T, A>::size / 2, -1>(); constauto mask = detail::pmask8<T, A::width>(0xaa); return detail::rvvmerge(detail::rvvrgather(lhs, index),
detail::rvvrgather(rhs, index),
mask);
}
// store_complex template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
XSIMD_INLINE void store_complex_aligned(std::complex<T>* dst, batch<std::complex<T>, A> const& src, requires_arch<rvv>) noexcept
{ constauto lo = zip_lo(src.real(), src.imag()); constauto hi = zip_hi(src.real(), src.imag());
T* buf = reinterpret_cast<T*>(dst);
store_aligned(buf, lo, rvv {});
store_aligned(buf + lo.size, hi, rvv {});
}
template <class A, class T, detail::rvv_enable_floating_point_t<T> = 0>
XSIMD_INLINE void store_complex_unaligned(std::complex<T>* dst, batch<std::complex<T>, A> const& src, requires_arch<rvv>) noexcept
{
store_complex_aligned(dst, src, rvv {});
}
// fast_cast namespace detail
{
XSIMD_RVV_OVERLOAD2(rvvfcvt_rtz, // truncating conversion, like C.
(__riscv_vfcvt_rtz_x),
(__riscv_vfcvt_rtz_xu), _DROP_1ST, vec(T, fvec))
XSIMD_RVV_OVERLOAD2(rvvfcvt_rne, // round to nearest, ties to even
(__riscv_vfcvt_x),
(__riscv_vfcvt_xu), _DROP_1ST_CUSTOM_ARGS, vec(T, fvec), args..., __RISCV_FRM_RNE)
XSIMD_RVV_OVERLOAD2(rvvfcvt_rmm, // round to nearest, ties to max magnitude
(__riscv_vfcvt_x),
(__riscv_vfcvt_xu), _DROP_1ST_CUSTOM_ARGS, vec(T, fvec), args..., __RISCV_FRM_RMM)
XSIMD_RVV_OVERLOAD2(rvvfcvt, // round to current rounding mode.
(__riscv_vfcvt_x),
(__riscv_vfcvt_xu), _DROP_1ST, vec(T, fvec))
XSIMD_RVV_OVERLOAD_INTS(rvvfcvt_f, (__riscv_vfcvt_f), , fvec(vec))
template <class T, class U> using rvv_enable_ftoi_t = typename std::enable_if<(sizeof(T) == sizeof(U) && std::is_floating_point<T>::value && !std::is_floating_point<U>::value), int>::type; template <class T, class U> using rvv_enable_itof_t = typename std::enable_if<(sizeof(T) == sizeof(U) && !std::is_floating_point<T>::value && std::is_floating_point<U>::value), int>::type;
template <class A, class T, class U, rvv_enable_ftoi_t<T, U> = 0>
XSIMD_INLINE batch<U, A> fast_cast(batch<T, A> const& arg, batch<U, A> const&, requires_arch<rvv>) noexcept
{ return rvvfcvt_rtz(U {}, arg);
} template <class A, class T, class U, rvv_enable_itof_t<T, U> = 0>
XSIMD_INLINE batch<U, A> fast_cast(batch<T, A> const& arg, batch<U, A> const&, requires_arch<rvv>) noexcept
{ return rvvfcvt_f(arg);
}
}
/********* * Miscs *
*********/
// set template <class A, class T, class... Args>
XSIMD_INLINE batch<T, A> set(batch<T, A> const&, requires_arch<rvv>, Args... args) noexcept
{ const std::array<T, batch<T, A>::size> tmp { args... }; return load_unaligned<A>(tmp.data(), convert<T>(), rvv {});
}
template <class A, class T, class... Args>
XSIMD_INLINE batch<std::complex<T>, A> set(batch<std::complex<T>, A> const&, requires_arch<rvv>,
Args... args_complex) noexcept
{ return batch<std::complex<T>>(set(batch<T, rvv> {}, rvv {}, args_complex.real()...),
set(batch<T, rvv> {}, rvv {}, args_complex.imag()...));
}
template <class A, class T, class... Args>
XSIMD_INLINE batch_bool<T, A> set(batch_bool<T, A> const&, requires_arch<rvv>, Args... args) noexcept
{ using U = as_unsigned_integer_t<T>; constauto values = set(batch<U, rvv> {}, rvv {}, static_cast<U>(args)...); constauto zero = broadcast<A>(U(0), rvv {});
detail::rvv_bool_t<T> result = detail::rvvmsne(values, zero); return result;
}
// insert template <class A, class T, size_t I, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE batch<T, A> insert(batch<T, A> const& arg, T val, index<I>, requires_arch<rvv>) noexcept
{ constauto mask = detail::pmask<T, A::width>(uint64_t(1) << I); return detail::rvvmerge_splat(arg, val, mask);
}
// get template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE T get(batch<T, A> const& arg, size_t i, requires_arch<rvv>) noexcept
{ constauto tmp = detail::rvvslidedown(arg, i); return detail::rvvmv_lane0(tmp);
}
template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE std::complex<T> get(batch<std::complex<T>, A> const& arg, size_t i, requires_arch<rvv>) noexcept
{ constauto tmpr = detail::rvvslidedown(arg.real(), i); constauto tmpi = detail::rvvslidedown(arg.imag(), i); return std::complex<T> { detail::rvvmv_lane0(tmpr), detail::rvvmv_lane0(tmpi) };
}
// all template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE bool all(batch_bool<T, A> const& arg, requires_arch<rvv>) noexcept
{ return detail::rvvcpop(arg) == batch_bool<T, A>::size;
}
// any template <class A, class T, detail::rvv_enable_all_t<T> = 0>
XSIMD_INLINE bool any(batch_bool<T, A> const& arg, requires_arch<rvv>) noexcept
{ return detail::rvvcpop(arg) > 0;
}
// bitwise_cast template <class A, class T, class R, detail::rvv_enable_all_t<T> = 0, detail::rvv_enable_all_t<R> = 0>
XSIMD_INLINE batch<R, A> bitwise_cast(batch<T, A> const& arg, batch<R, A> const&, requires_arch<rvv>) noexcept
{ return detail::rvv_reg_t<R, A::width>(arg.data.get_bytes(), types::detail::XSIMD_RVV_BITCAST);
}
// batch_bool_cast template <class A, class T_out, class T_in, detail::rvv_enable_all_t<T_in> = 0>
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.23 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.