//****************************************************************************// // Copyright (C) 2016-2018 Florent Hivert <Florent.Hivert@lri.fr>, // // // // Distributed under the terms of the GNU General Public License (GPL) // // // // This code is distributed in the hope that it will be useful, // // but WITHOUT ANY WARRANTY; without even the implied warranty of // // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // // General Public License for more details. // // // // The full text of the GPL is available at: // // // // http://www.gnu.org/licenses/ // //****************************************************************************//
inline HPCOMBI_CONSTEXPR TPU operator()(type_elem c) const { return make_helper(ConstFun<type_elem>(c),
std::make_index_sequence<size>{});
} // explicit overloading for int constants inline HPCOMBI_CONSTEXPR TPU operator()(int c) const { returnoperator()(uint8_t(c));
} inline HPCOMBI_CONSTEXPR TPU operator()(size_t c) const { returnoperator()(uint8_t(c));
}
};
// The following functions should be constexpr lambdas writen directly in // their corresponding methods. However until C++17, constexpr lambda are // forbidden. So we put them here. /// The image of i by the identity function
HPCOMBI_CONSTEXPR uint8_t id_fun(uint8_t i) { return i; } /// The image of i by the left cycle function
HPCOMBI_CONSTEXPR uint8_t left_cycle_fun(uint8_t i) { return (i + 15) % 16; } /// The image of i by the right cycle function
HPCOMBI_CONSTEXPR
uint8_t right_cycle_fun(uint8_t i) { return (i + 1) % 16; } /// The image of i by a left shift duplicating the hole
HPCOMBI_CONSTEXPR
uint8_t left_dup_fun(uint8_t i) { return i == 15 ? 15 : i + 1; } /// The image of i by a right shift duplicating the hole
HPCOMBI_CONSTEXPR
uint8_t right_dup_fun(uint8_t i) { return i == 0 ? 0 : i - 1; } /// The complement of i to 15
HPCOMBI_CONSTEXPR
uint8_t complement_fun(uint8_t i) { return 15 - i; }
HPCOMBI_CONSTEXPR uint8_t popcount4_fun(uint8_t i) { return ((i & 1) != 0 ? 1 : 0)
+ ((i & 2) != 0 ? 1 : 0)
+ ((i & 4) != 0 ? 1 : 0)
+ ((i & 8) != 0 ? 1 : 0);
}
} // Anonymous namespace
/// Factory object for various SIMD constants in particular constexpr
TPUBuild<epu8> Epu8;
/// The indentity #HPCombi::epu8
HPCOMBI_CONSTEXPR epu8 epu8id = Epu8(id_fun); /// The reverted identity #HPCombi::epu8
HPCOMBI_CONSTEXPR epu8 epu8rev = Epu8(complement_fun); /// Left cycle #HPCombi::epu8 permutation
HPCOMBI_CONSTEXPR epu8 left_cycle = Epu8(left_cycle_fun); /// Right cycle #HPCombi::epu8 permutation
HPCOMBI_CONSTEXPR epu8 right_cycle = Epu8(right_cycle_fun); /// Left shift #HPCombi::epu8, duplicating the rightmost entry
HPCOMBI_CONSTEXPR epu8 left_dup = Epu8(left_dup_fun); /// Right shift #HPCombi::epu8, duplicating the leftmost entry
HPCOMBI_CONSTEXPR epu8 right_dup = Epu8(right_dup_fun); /// Popcount #HPCombi::epu8: the ith entry contains the number of bits set in i
HPCOMBI_CONSTEXPR epu8 popcount4 = Epu8(popcount4_fun);
/** Cast a #HPCombi::epu8 to a c++ \c std::array * * This is usually faster for algorithm using a lot of indexed acces.
*/ inline TPUBuild<epu8>::array &as_array(epu8 &v) { returnreinterpret_cast<typename TPUBuild<epu8>::array &>(v);
} /** Cast a constant #HPCombi::epu8 to a C++ \c std::array * * This is usually faster for algorithm using a lot of indexed acces.
*/ inlineconst TPUBuild<epu8>::array &as_array(const epu8 &v) { returnreinterpret_cast<consttypename TPUBuild<epu8>::array &>(v);
} /** Cast a C++ \c std::array to a #HPCombi::epu8 */ // Passing the argument by reference triggers a segfault in gcc // Since vector types doesn't belongs to the standard, I didn't manage // to know if I'm using undefined behavior here. inline epu8 from_array(TPUBuild<epu8>::array a) { returnreinterpret_cast<const epu8 &>(a);
}
/** Cast a #HPCombi::epu8 to a c++ #HPCombi::VectGeneric * * This is usually faster for algorithm using a lot of indexed acces.
*/ inline VectGeneric<16> &as_VectGeneric(epu8 &v) { returnreinterpret_cast<VectGeneric<16> &>(as_array(v));
}
/** Cast a #HPCombi::epu8 to a c++ #HPCombi::VectGeneric * * This is usually faster for algorithm using a lot of indexed acces.
*/ inlineconst VectGeneric<16> &as_VectGeneric(const epu8 &v) { returnreinterpret_cast<const VectGeneric<16> &>(as_array(v));
}
/** Test whether all the entries of a #HPCombi::epu8 are zero */ inlinebool is_all_zero(epu8 a) { return _mm_testz_si128(a, a); } /** Test whether all the entries of a #HPCombi::epu8 are one */ inlinebool is_all_one(epu8 a) { return _mm_testc_si128(a, Epu8(0xFF)); }
/** Equality of #HPCombi::epu8 */ inlinebool equal(epu8 a, epu8 b) { return is_all_zero(_mm_xor_si128(a, b)); } /** Non equality of #HPCombi::epu8 */ inlinebool not_equal(epu8 a, epu8 b) { returnnot equal(a, b); }
/** Permuting a #HPCombi::epu8 */ inline epu8 permuted(epu8 a, epu8 b) { return _mm_shuffle_epi8(a, b); } /** Left shifted of a #HPCombi::epu8 inserting a 0 * @warning we use the convention that the 0 entry is on the left !
*/ inline epu8 shifted_right(epu8 a) { return _mm_bslli_si128(a, 1); } /** Right shifted of a #HPCombi::epu8 inserting a 0 * @warning we use the convention that the 0 entry is on the left !
*/ inline epu8 shifted_left(epu8 a) { return _mm_bsrli_si128(a, 1); } /** Reverting a #HPCombi::epu8 */ inline epu8 reverted(epu8 a) { return permuted(a, epu8rev); }
/** Vector min between two #HPCombi::epu8 0 */ inline epu8 min(epu8 a, epu8 b) { return _mm_min_epu8(a, b); } /** Vector max between two #HPCombi::epu8 0 */ inline epu8 max(epu8 a, epu8 b) { return _mm_max_epu8(a, b); }
/** Testing if a #HPCombi::epu8 is sorted */ inlinebool is_sorted(epu8 a); /** Return a sorted #HPCombi::epu8 * @details * @par Algorithm: * Uses the 9 stages sorting network #sorting_rounds
*/ inline epu8 sorted(epu8 a); /** Return a #HPCombi::epu8 with the two half sorted * @details * @par Algorithm: Uses a 6 stages sorting network #sorting_rounds8
*/ inline epu8 sorted8(epu8 a); /** Return a reverse sorted #HPCombi::epu8 * @details * @par Algorithm: * Uses the 9 stages sorting network #sorting_rounds
*/ inline epu8 revsorted(epu8 a); /** Return a #HPCombi::epu8 with the two half reverse sorted * @details * @par Algorithm: Uses a 6 stages sorting network #sorting_rounds8
*/ inline epu8 revsorted8(epu8 a);
/** Sort \c this and return the sorting permutation * @details * @par Algorithm: Uses a 9 stages sorting network #sorting_rounds8
*/ inline epu8 sort_perm(epu8 & a); /** Sort \c this and return the sorting permutation * @details * @par Algorithm: Uses a 9 stages sorting network #sorting_rounds8
*/ inline epu8 sort8_perm(epu8 & a);
/** Find if a vector is a permutation of one other * @details * @param a, b: two #HPCombi::epu8 * @returns a #HPCombi::epu8 * For each @f$0 \leq i < 16@f$, \c res[i] is the position in \c a of \c b[i] if \c b[i] appears exactly once in \c a, or undefined if not.
*/ inline epu8 permutation_of(epu8 a, epu8 b);
/** A prime number good for hashing */ const uint64_t prime = 0x9e3779b97f4a7bb9;
/** A random #HPCombi::epu8 * @details * @param bnd : the upper bound for the value of the entries * @returns a random #HPCombi::epu8 with value in the interval * @f$[0, 1, 2, ..., bnd-1]@f$.
*/ inline epu8 random_epu8(uint16_t bnd);
/** Remove duplicates in a sorted #HPCombi::epu8 * @details * @param a: supposed to be sorted * @param repl: the value replacing the duplicate entries (default to 0) * @return a where repeated occurences of entries are replaced by \c repl
*/ inline epu8 remove_dups(epu8 a, uint8_t repl = 0);
/** @class common_horiz_sum * @brief Horizontal sum of a #HPCombi::epu8 * @details * @returns the horizontal sum of the input * @par Example: * @code * horiz_sum(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15}); * @endcode * Returns `110` * @warning The result is supposed to fit in a \c uint8_t
*/ /** @copydoc common_horiz_sum * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access
*/ inline uint8_t horiz_sum_ref(epu8); /** @copydoc common_horiz_sum * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric
*/ inline uint8_t horiz_sum_gen(epu8); /** @copydoc common_horiz_sum * @par Algorithm: * 4-stages paralell algorithm
*/ inline uint8_t horiz_sum4(epu8); /** @copydoc common_horiz_sum * @par Algorithm: * 3-stages paralell algorithm + indexed access
*/ inline uint8_t horiz_sum3(epu8); /** @copydoc common_horiz_sum */ inline uint8_t horiz_sum(epu8 v) { return horiz_sum3(v); }
/** @class common_eval16 * @brief Evaluation of a #HPCombi::epu8 * @details * @param v : a #HPCombi::epu8 * @returns the evaluation, that is the #HPCombi::epu8 \c r such that * \c r[i] is the number of occurrence of \c i in the input \c v * @par Example: * @code * eval16(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15}); * @endcode * Returns `{ 1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1}` * @warning The entries larger than 15 are ignored
*/ /** @copydoc common_eval16 * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access
*/ inline epu8 eval16_ref(epu8 v); /** @copydoc common_eval16 * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and cast to array
*/ inline epu8 eval16_arr(epu8 v); /** @copydoc common_eval16 * @par Algorithm: * Vector @f$O(n)@f$ using cyclic shifting
*/ inline epu8 eval16_cycle(epu8 v); /** @copydoc common_eval16 * @par Algorithm: * Vector @f$O(n)@f$ using popcount
*/ inline epu8 eval16_popcount(epu8 v); /** @copydoc common_eval16 */ inline epu8 eval16(epu8 v) { return eval16_cycle(v); }
/** @class common_first_diff * @brief The first difference between two #HPCombi::epu8 * @details * @param a, b : two #HPCombi::epu8 * @param bound : a \c size_t * @returns the smallest index @f$i<bound@f$ such that \c a[i] and \c b[i] * differ, 16 if there is no differences before bound. * @par Example: * @code * epu8 a { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15}; * epu8 b { 5, 5, 2, 9, 1, 6,12, 4, 0, 4, 4, 4,12,13,14,15}; * @endcode * then `first_diff(a, b)` returns `3`, * `first_diff(a, b, 3)` returns `16`, * `first_diff(a, b, 4)` returns `3`, * `first_diff(a, b, 7)` returns `3`. * @warning `bound` is assumed to be smaller or equal than 16
*/ /** @copydoc common_first_diff * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access
*/ inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16); /** @copydoc common_first_diff * @par Algorithm: * Using \c cmpestri instruction
*/ inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16); /** @copydoc common_first_diff * @par Algorithm: * Using vector comparison and mask
*/ inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound = 16); /** @copydoc common_first_diff */ inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) { return first_diff_mask(a, b, bound);
}
/** @class common_last_diff * @brief The last difference between two #HPCombi::epu8 * @details * @param a, b : two #HPCombi::epu8 * @param bound : a \c size_t * @returns the largest index @f$i<bound@f$ such that \c a[i] and \c b[i] * differ, 16 if there is no differences before bound. * @par Example: * @code * epu8 a { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15}; * epu8 b { 5, 5, 2, 9, 1, 6,12, 4, 0, 4, 4, 4,12,13,14,15}; * @endcode * then `last_diff(a, b)` returns `11`, * `last_diff(a, b, 3)` returns `16`, * `last_diff(a, b, 4)` returns `3`, * `last_diff(a, b, 7)` returns `3`. * @warning `bound` is assumed to be smaller or equal than 16
*/ /** @copydoc common_last_diff * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access
*/ inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16); /** @copydoc common_last_diff * @par Algorithm: * Using \c cmpestri instruction
*/ inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16); /** @copydoc common_last_diff * @par Algorithm: * Using vector comparison and mask
*/ inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound = 16); /** @copydoc common_last_diff */ inline uint64_t last_diff(epu8 a, epu8 b, size_t bound = 16) { return last_diff_mask(a, b, bound);
}
/** Lexicographic comparison between two #HPCombi::epu8 */ inlinebool less(epu8 a, epu8 b); /** Partial lexicographic comparison between two #HPCombi::epu8 * @param a, b : the vectors to compare * @param k : the bound for the lexicographic comparison * @return a positive, negative or zero char depending on the result
*/ inlinechar less_partial(epu8 a, epu8 b, int k);
/** return the index of the first zero entry or 16 if there are none * Only index smaller than bound are taken into account.
*/ inline uint64_t first_zero(epu8 v, int bnd); /** return the index of the last zero entry or 16 if there are none * Only index smaller than bound are taken into account.
*/ inline uint64_t last_zero(epu8 v, int bnd); /** return the index of the first non zero entry or 16 if there are none * Only index smaller than bound are taken into account.
*/ inline uint64_t first_non_zero(epu8 v, int bnd); /** return the index of the last non zero entry or 16 if there are none * Only index smaller than bound are taken into account.
*/ inline uint64_t last_non_zero(epu8 v, int bnd);
/** a vector popcount function
*/ inline epu8 popcount16(epu8 v);
/** Test for partial transformation * @details * @returns whether \c v is a partial transformation. * @param v the vector to test * @param k the size of \c *this (default 16) * * Points where the function is undefined are mapped to \c 0xff. If \c *this * is a tranformation of @f$0\dots n-1@f$ for @f$n<16@f$, it should be completed * to a transformation of @f$0\dots 15@f$ by adding fixed points. That is the * values @f$i\geq n@f$ should be mapped to themself. * @par Example: * The partial tranformation * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 . . 4 \end{matrix}@f$ * is encoded by the array {2,0,5,0xff,0xff,4,6,7,8,9,10,11,12,13,14,15}
*/ inlinebool is_partial_transformation(epu8 v, const size_t k = 16);
/** Test for transformation * @details * @returns whether \c *this is a transformation. * @param v the vector to test * @param k the size of \c *this (default 16) * * If \c *this is a tranformation of @f$0\dots n-1@f$ for @f$n<16@f$, * it should be completed to a transformation of @f$0\dots 15@f$ * by adding fixed points. That is the values @f$i\geq n@f$ should be * mapped to themself. * @par Example: * The tranformation * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 2 1 4 \end{matrix}@f$ * is encoded by the array {2,0,5,2,1,4,6,7,8,9,10,11,12,13,14,15}
*/ inlinebool is_transformation(epu8 v, const size_t k = 16);
/** Test for partial permutations * @details * @returns whether \c *this is a partial permutation. * @param v the vector to test * @param k the size of \c *this (default 16) * * Points where the function is undefined are mapped to \c 0xff. * If \c *this is a partial permutation of @f$0\dots n-1@f$ for @f$n<16@f$, * it should be completed to a partial permutation of @f$0\dots 15@f$ * by adding fixed points. That is the values @f$i\geq n@f$ should be * mapped to themself. * @par Example: * The permutation * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 . . 4 \end{matrix}@f$ * is encoded by the array {2,0,5,0xFF,0xFF,4,6,7,8,9,10,11,12,13,14,15}
*/ inlinebool is_partial_permutation(epu8 v, const size_t k = 16);
/** Test for permutations * @details * @returns whether \c *this is a permutation. * @param v the vector to test * @param k the size of \c *this (default 16) * * If \c *this is a permutation of @f$0\dots n-1@f$ for @f$n<16@f$, * it should be completed to a permutation of @f$0\dots 15@f$ * by adding fixed points. That is the values @f$i\geq n@f$ should be * mapped to themself. * @par Example: * The permutation * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 3 1 4 \end{matrix}@f$ * is encoded by the array {2,0,5,3,1,4,6,7,8,9,10,11,12,13,14,15}
*/ inlinebool is_permutation(epu8 v, const size_t k = 16);
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.